libdap Updated for version 3.21.0
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2012 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27//#define DODS_DEBUG 1
28
29#include <iostream>
30#include <sstream>
31
32#include <cstring>
33#include <cstdarg>
34#include <cassert>
35
36#include <libxml2/libxml/parserInternals.h>
37
38#include "DMR.h"
39
40#include "BaseType.h"
41#include "Array.h"
42#include "D4Group.h"
43#include "D4Attributes.h"
44#include "D4Maps.h"
45#include "D4Enum.h"
46#include "D4BaseTypeFactory.h"
47
48#include "DapXmlNamespaces.h"
49#include "D4ParserSax2.h"
50
51#include "util.h"
52#include "debug.h"
53
54namespace libdap {
55
56static const char *states[] = {
57 "parser_start",
58
59 "inside_dataset",
60
61 // inside_group is the state just after parsing the start of a Group
62 // element.
63 "inside_group",
64
65 "inside_attribute_container",
66 "inside_attribute",
67 "inside_attribute_value",
68 "inside_other_xml_attribute",
69
70 "inside_enum_def",
71 "inside_enum_const",
72
73 "inside_dim_def",
74
75 // This covers Byte, ..., Url, Opaque
76 "inside_simple_type",
77
78 // "inside_array",
79 "inside_dim",
80 "inside_map",
81
82 "inside_constructor",
83
84 "not_dap4_element",
85
86 "parser_unknown",
87 "parser_error",
88 "parser_fatal_error",
89
90 "parser_end"
91};
92
93static bool is_not(const char *name, const char *tag)
94{
95 return strcmp(name, tag) != 0;
96}
97
106D4EnumDef *
107D4ParserSax2::enum_def()
108{
109 if (!d_enum_def) d_enum_def = new D4EnumDef;
110
111 return d_enum_def;
112}
113
121D4ParserSax2::dim_def() {
122 if (!d_dim_def) d_dim_def = new D4Dimension;
123
124 return d_dim_def;
125}
126
132void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
133{
134 if (!xml_attrs.empty())
135 xml_attrs.clear(); // erase old attributes
136
137 // Make a value using the attribute name and the prefix, namespace URI
138 // and the value. The prefix might be null.
139 unsigned int index = 0;
140 for (int i = 0; i < nb_attributes; ++i, index += 5) {
141 xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
142 XMLAttribute(attributes + index + 1)));
143
144 DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
145 << xml_attrs[(const char *)attributes[index]].value << endl);
146 }
147}
148
155void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
156{
157 // make a value with the prefix and namespace URI. The prefix might be null.
158 for (int i = 0; i < nb_namespaces; ++i) {
159 namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
160 (const char *)namespaces[i * 2 + 1]));
161 }
162}
163
170bool D4ParserSax2::check_required_attribute(const string & attr)
171{
172 if (xml_attrs.find(attr) == xml_attrs.end()) {
173 dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
174 return false;
175 }
176 else
177 return true;
178}
179
186bool D4ParserSax2::check_attribute(const string & attr)
187{
188 return (xml_attrs.find(attr) != xml_attrs.end());
189}
190
191bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
192{
193 if (is_not(name, "Dimension"))
194 return false;
195
196 transfer_xml_attrs(attrs, nb_attributes);
197
198 if (!(check_required_attribute("name") && check_required_attribute("size"))) {
199 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
200 return false;
201 }
202
203 // This getter (dim_def) allocates a new object if needed.
204 dim_def()->set_name(xml_attrs["name"].value);
205 try {
206 dim_def()->set_size(xml_attrs["size"].value);
207 }
208 catch (Error &e) {
209 dmr_error(this, "%s", e.get_error_message().c_str());
210 return false;
211 }
212
213 return true;
214}
215
233bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
234{
235 if (is_not(name, "Dim"))
236 return false;
237
238 transfer_xml_attrs(attrs, nb_attributes);
239
240 if (check_attribute("size") && check_attribute("name")) {
241 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
242 return false;
243 }
244 if (!(check_attribute("size") || check_attribute("name"))) {
245 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
246 return false;
247 }
248
249 if (!top_basetype()->is_vector_type()) {
250 // Make the top BaseType* an array
251 BaseType *b = top_basetype();
252 pop_basetype();
253
254 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
255 a->set_is_dap4(true);
256 a->add_var_nocopy(b);
257 a->set_attributes_nocopy(b->attributes());
258 // trick: instead of popping b's attributes, copying them and then pushing
259 // a's copy, just move the pointer (but make sure there's only one object that
260 // references that pointer).
261 b->set_attributes_nocopy(0);
262
263 push_basetype(a);
264 }
265
266 assert(top_basetype()->is_vector_type());
267
268 Array *a = static_cast<Array*>(top_basetype());
269 if (check_attribute("size")) {
270
271 a->append_dim_ll(strtoll(xml_attrs["size"].value.c_str(),nullptr,10));
272#if 0
273 a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
274#endif
275 return true;
276 }
277 else if (check_attribute("name")) {
278 string name = xml_attrs["name"].value;
279
280 D4Dimension *dim = 0;
281 if (name[0] == '/') // lookup the Dimension in the root group
282 dim = dmr()->root()->find_dim(name);
283 else // get enclosing Group and lookup Dimension there
284 dim = top_group()->find_dim(name);
285
286 if (!dim)
287 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
288 a->append_dim(dim);
289 return true;
290 }
291
292 return false;
293}
294
295bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
296{
297 if (is_not(name, "Map"))
298 return false;
299
300 transfer_xml_attrs(attrs, nb_attributes);
301
302 if (!check_attribute("name")) {
303 dmr_error(this, "The 'name' attribute must be used in a Map element.");
304 return false;
305 }
306
307 if (!top_basetype()->is_vector_type()) {
308 // Make the top BaseType* an array
309 BaseType *b = top_basetype();
310 pop_basetype();
311
312 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
313 a->set_is_dap4(true);
314 a->add_var_nocopy(b);
315 a->set_attributes_nocopy(b->attributes());
316 // trick: instead of popping b's attributes, copying them and then pushing
317 // a's copy, just move the pointer (but make sure there's only one object that
318 // references that pointer).
319 b->set_attributes_nocopy(0);
320
321 push_basetype(a);
322 }
323
324 assert(top_basetype()->is_vector_type());
325
326 Array *a = static_cast<Array*>(top_basetype());
327
328 string map_name = xml_attrs["name"].value;
329 if (xml_attrs["name"].value[0] != '/')
330 map_name = top_group()->FQN() + map_name;
331
332 Array *map_source = 0; // The array variable that holds the data for the Map
333
334 if (map_name[0] == '/') // lookup the Map in the root group
335 map_source = dmr()->root()->find_map_source(map_name);
336 else // get enclosing Group and lookup Map there
337 map_source = top_group()->find_map_source(map_name);
338
339 // Change: If the parser is in 'strict' mode (the default) and the Array named by
340 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
341 // mode), then this is not an error. However, the Array referenced by the Map will
342 // be null. This is a change in the parser's behavior to accommodate requests for
343 // Arrays that include Maps that do not also include the Map(s) in the request.
344 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
345 if (!map_source && d_strict)
346 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
347
348 a->maps()->add_map(new D4Map(map_name, map_source));
349
350 return true;
351}
352
353bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
354{
355 if (is_not(name, "Group"))
356 return false;
357
358 transfer_xml_attrs(attrs, nb_attributes);
359
360 if (!check_required_attribute("name")) {
361 dmr_error(this, "The required attribute 'name' was missing from a Group element.");
362 return false;
363 }
364
365 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
366 if (!btp) {
367 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
368 return false;
369 }
370
371 D4Group *grp = static_cast<D4Group*>(btp);
372
373 // Need to set this to get the D4Attribute behavior in the type classes
374 // shared between DAP2 and DAP4. jhrg 4/18/13
375 grp->set_is_dap4(true);
376
377 // link it up and change the current group
378 D4Group *parent = top_group();
379 if (!parent) {
380 dmr_fatal_error(this, "No Group on the Group stack.");
381 return false;
382 }
383
384 grp->set_parent(parent);
385 parent->add_group_nocopy(grp);
386
387 push_group(grp);
388 push_attributes(grp->attributes());
389 return true;
390}
391
398inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
399{
400 if (is_not(name, "Attribute"))
401 return false;
402
403 // These methods set the state to parser_error if a problem is found.
404 transfer_xml_attrs(attrs, nb_attributes);
405
406 // add error
407 if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
408 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
409 return false;
410 }
411
412 if (xml_attrs["type"].value == "Container") {
413 push_state(inside_attribute_container);
414
415 DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
416 D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
417
418 D4Attributes *tos = top_attributes();
419 // add return
420 if (!tos) {
421 delete child;
422 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
423 return false;
424 }
425
426 tos->add_attribute_nocopy(child);
427 push_attributes(child->attributes());
428 }
429 else if (xml_attrs["type"].value == "OtherXML") {
430 push_state(inside_other_xml_attribute);
431
432 dods_attr_name = xml_attrs["name"].value;
433 dods_attr_type = xml_attrs["type"].value;
434 }
435 else {
436 push_state(inside_attribute);
437
438 dods_attr_name = xml_attrs["name"].value;
439 dods_attr_type = xml_attrs["type"].value;
440 }
441
442 return true;
443}
444
450inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
451{
452 if (is_not(name, "Enumeration"))
453 return false;
454
455 transfer_xml_attrs(attrs, nb_attributes);
456
457 if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
458 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
459 return false;
460 }
461
462 Type t = get_type(xml_attrs["basetype"].value.c_str());
463 if (!is_integer_type(t)) {
464 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
465 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
466 return false;
467 }
468
469 // This getter allocates a new object if needed.
470 string enum_def_path = xml_attrs["name"].value;
471#if 0
472 // Use FQNs when things are referenced, not when they are defined
473 if (xml_attrs["name"].value[0] != '/')
474 enum_def_path = top_group()->FQN() + enum_def_path;
475#endif
476 enum_def()->set_name(enum_def_path);
477 enum_def()->set_type(t);
478
479 return true;
480}
481
482inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
483{
484 if (is_not(name, "EnumConst"))
485 return false;
486
487 // These methods set the state to parser_error if a problem is found.
488 transfer_xml_attrs(attrs, nb_attributes);
489
490 if (!(check_required_attribute("name") && check_required_attribute("value"))) {
491 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
492 return false;
493 }
494
495 istringstream iss(xml_attrs["value"].value);
496 long long value = 0;
497 iss >> skipws >> value;
498 if (iss.fail() || iss.bad()) {
499 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
500 xml_attrs["value"].value.c_str());
501 }
502 else if (!enum_def()->is_valid_enum_value(value)) {
503 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
504 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
505 }
506 else {
507 // unfortunate choice of names... args are 'label' and 'value'
508 enum_def()->add_value(xml_attrs["name"].value, value);
509 }
510
511 return true;
512}
513
519inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
520{
521 Type t = get_type(name);
522 if (is_simple_type(t)) {
523 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
524 return true;
525 }
526 else {
527 switch(t) {
528 case dods_structure_c:
529 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
530 return true;
531
532 case dods_sequence_c:
533 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
534 return true;
535
536 default:
537 return false;
538 }
539 }
540}
541
549void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
550{
551 transfer_xml_attrs(attrs, nb_attributes);
552
553 if (check_required_attribute("name")) {
554 BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
555 if (!btp) {
556 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
557 return;
558 }
559
560 if ((t == dods_enum_c) && check_required_attribute("enum")) {
561 D4EnumDef *enum_def = 0;
562 string enum_path = xml_attrs["enum"].value;
563 if (enum_path[0] == '/')
564 enum_def = dmr()->root()->find_enum_def(enum_path);
565 else
566 enum_def = top_group()->find_enum_def(enum_path);
567
568 if (!enum_def)
569 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
570
571 static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
572 }
573
574 btp->set_is_dap4(true); // see comment above
575 push_basetype(btp);
576
577 push_attributes(btp->attributes());
578
579 push_state(s);
580 }
581}
582
589
595{
596 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
597 parser->d_error_msg = "";
598 parser->char_data = "";
599
600 // Set this in intern_helper so that the loop test for the parser_end
601 // state works for the first iteration. It seems like XMLParseChunk calls this
602 // function on it's first run. jhrg 9/16/13
603 // parser->push_state(parser_start);
604
605 parser->push_attributes(parser->dmr()->root()->attributes());
606
607 if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
608}
609
613{
614 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
615
616 if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
617
618 if (parser->get_state() != parser_end)
619 D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
620
621 // If we've found any sort of error, don't make the DMR; intern() will
622 // take care of the error.
623 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
624 return;
625
626 if (!parser->empty_basetype() || parser->empty_group())
627 D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
628
629 parser->pop_group(); // leave the stack 'clean'
630 parser->pop_attributes();
631}
632
646void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
647 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
648 const xmlChar **attributes)
649{
650 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
651 const char *localname = (const char *) l;
652
653 if (parser->debug()) cerr << "Start element " << localname << " prefix: "<< (prefix?(char *)prefix:"null") << " ns: "<< (URI?(char *)URI:"null")
654 << " (state: " << states[parser->get_state()] << ")" << endl;
655
656 if(parser->get_state() != parser_error){
657 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
658 if (parser->debug()) cerr << "dap4_ns_name: " << dap4_ns_name << endl;
659
660 string this_element_ns_name = (URI != 0) ? ((char *)URI) : "";
661 if (parser->debug()) cerr << "this_element_ns_name: " << this_element_ns_name << endl;
662
663 if(this_element_ns_name.compare(dap4_ns_name)){
664 if (parser->debug()) cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
665 parser->push_state(not_dap4_element);
666 // return;
667 }
668 }
669
670
671 switch (parser->get_state()) {
672 case parser_start:
673 if (is_not(localname, "Dataset"))
674 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
675
676 parser->root_ns = URI ? (const char *) URI : "";
677 parser->transfer_xml_attrs(attributes, nb_attributes);
678
679 if (parser->check_required_attribute(string("name")))
680 parser->dmr()->set_name(parser->xml_attrs["name"].value);
681
682 if (parser->check_attribute("dapVersion"))
683 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
684
685 if (parser->check_attribute("dmrVersion"))
686 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
687
688 if (parser->check_attribute("base"))
689 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
690
691 if (!parser->root_ns.empty())
692 parser->dmr()->set_namespace(parser->root_ns);
693
694 // Push the root Group on the stack
695 parser->push_group(parser->dmr()->root());
696
697 parser->push_state(inside_dataset);
698
699 break;
700
701 // Both inside dataset and inside group can have the same stuff.
702 // The difference is that the Dataset holds the root group, which
703 // must be present; other groups are optional
704 case inside_dataset:
705 case inside_group:
706 if (parser->process_enum_def(localname, attributes, nb_attributes))
707 parser->push_state(inside_enum_def);
708 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
709 parser->push_state(inside_dim_def);
710 else if (parser->process_group(localname, attributes, nb_attributes))
711 parser->push_state(inside_group);
712 else if (parser->process_variable(localname, attributes, nb_attributes))
713 // This will push either inside_simple_type or inside_structure
714 // onto the parser state stack.
715 break;
716 else if (parser->process_attribute(localname, attributes, nb_attributes))
717 // This will push either inside_attribute, inside_attribute_container
718 // or inside_otherxml_attribute onto the parser state stack
719 break;
720 else
721 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
722 break;
723
724 case inside_attribute_container:
725 if (parser->process_attribute(localname, attributes, nb_attributes))
726 break;
727 else
728 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
729 break;
730
731 case inside_attribute:
732 if (parser->process_attribute(localname, attributes, nb_attributes))
733 break;
734 else if (strcmp(localname, "Value") == 0)
735 parser->push_state(inside_attribute_value);
736 else
737 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
738 break;
739
740 case inside_attribute_value:
741 // Attribute values are processed by the end element code.
742 break;
743
744 case inside_other_xml_attribute:
745 parser->other_xml_depth++;
746
747 // Accumulate the elements here
748 parser->other_xml.append("<");
749 if (prefix) {
750 parser->other_xml.append((const char *) prefix);
751 parser->other_xml.append(":");
752 }
753 parser->other_xml.append(localname);
754
755 if (nb_namespaces != 0) {
756 parser->transfer_xml_ns(namespaces, nb_namespaces);
757
758 for (map<string, string>::iterator i = parser->namespace_table.begin();
759 i != parser->namespace_table.end(); ++i) {
760 parser->other_xml.append(" xmlns");
761 if (!i->first.empty()) {
762 parser->other_xml.append(":");
763 parser->other_xml.append(i->first);
764 }
765 parser->other_xml.append("=\"");
766 parser->other_xml.append(i->second);
767 parser->other_xml.append("\"");
768 }
769 }
770
771 if (nb_attributes != 0) {
772 parser->transfer_xml_attrs(attributes, nb_attributes);
773 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
774 parser->other_xml.append(" ");
775 if (!i->second.prefix.empty()) {
776 parser->other_xml.append(i->second.prefix);
777 parser->other_xml.append(":");
778 }
779 parser->other_xml.append(i->first);
780 parser->other_xml.append("=\"");
781 parser->other_xml.append(i->second.value);
782 parser->other_xml.append("\"");
783 }
784 }
785
786 parser->other_xml.append(">");
787 break;
788
789 case inside_enum_def:
790 // process an EnumConst element
791 if (parser->process_enum_const(localname, attributes, nb_attributes))
792 parser->push_state(inside_enum_const);
793 else
794 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
795 break;
796
797 case inside_enum_const:
798 // No content; nothing to do
799 break;
800
801 case inside_dim_def:
802 // No content; nothing to do
803 break;
804#if 0
805 case inside_dimension:
806 // No content.
807 break;
808#endif
809 case inside_dim:
810 // No content.
811 break;
812
813 case inside_map:
814 // No content.
815 break;
816
817 case inside_simple_type:
818 if (parser->process_attribute(localname, attributes, nb_attributes))
819 break;
820 else if (parser->process_dimension(localname, attributes, nb_attributes))
821 parser->push_state(inside_dim);
822 else if (parser->process_map(localname, attributes, nb_attributes))
823 parser->push_state(inside_map);
824 else
825 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
826 break;
827
828 case inside_constructor:
829 if (parser->process_variable(localname, attributes, nb_attributes))
830 // This will push either inside_simple_type or inside_structure
831 // onto the parser state stack.
832 break;
833 else if (parser->process_attribute(localname, attributes, nb_attributes))
834 break;
835 else if (parser->process_dimension(localname, attributes, nb_attributes))
836 parser->push_state(inside_dim);
837 else if (parser->process_map(localname, attributes, nb_attributes))
838 parser->push_state(inside_map);
839 else
840 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
841 break;
842
843 case not_dap4_element:
844 if (parser->debug()) cerr << "Inside non DAP4 element. localname: " << localname << endl;
845 break;
846
847 case parser_unknown:
848 // FIXME?
849 // *** Never used? If so remove/error
850 parser->push_state(parser_unknown);
851 break;
852
853 case parser_error:
854 case parser_fatal_error:
855 break;
856
857 case parser_end:
858 // FIXME Error?
859 break;
860 }
861
862 if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
863}
864
865void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
866{
867 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
868 const char *localname = (const char *) l;
869
870 if (parser->debug())
871 cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
872
873 switch (parser->get_state()) {
874 case parser_start:
875 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
876 break;
877
878 case inside_dataset:
879 if (is_not(localname, "Dataset"))
880 D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
881
882 parser->pop_state();
883 if (parser->get_state() != parser_start)
884 dmr_fatal_error(parser, "Unexpected state, expected start state.");
885 else {
886 parser->pop_state();
887 parser->push_state(parser_end);
888 }
889 break;
890
891 case inside_group: {
892 if (is_not(localname, "Group"))
893 D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
894
895 if (!parser->empty_basetype() || parser->empty_group())
896 D4ParserSax2::dmr_error(parser,
897 "The document did not contain a valid root Group or contained unbalanced tags.");
898
899 parser->pop_group();
900 parser->pop_state();
901 break;
902 }
903
904 case inside_attribute_container:
905 if (is_not(localname, "Attribute"))
906 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
907
908 parser->pop_state();
909 parser->pop_attributes();
910 break;
911
912 case inside_attribute:
913 if (is_not(localname, "Attribute"))
914 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
915
916 parser->pop_state();
917 break;
918
919 case inside_attribute_value: {
920 if (is_not(localname, "Value"))
921 D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
922
923 parser->pop_state();
924
925 // The old code added more values using the name and type as
926 // indexes to find the correct attribute. Use get() for that
927 // now. Or fix this code to keep a pointer to the to attribute...
928 D4Attributes *attrs = parser->top_attributes();
929 D4Attribute *attr = attrs->get(parser->dods_attr_name);
930 if (!attr) {
931 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
932 attrs->add_attribute_nocopy(attr);
933 }
934 attr->add_value(parser->char_data);
935
936 parser->char_data = ""; // Null this after use.
937 break;
938 }
939
940 case inside_other_xml_attribute: {
941 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
942 parser->pop_state();
943
944 // The old code added more values using the name and type as
945 // indexes to find the correct attribute. Use get() for that
946 // now. Or fix this code to keep a pointer to the to attribute...
947 D4Attributes *attrs = parser->top_attributes();
948 D4Attribute *attr = attrs->get(parser->dods_attr_name);
949 if (!attr) {
950 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
951 attrs->add_attribute_nocopy(attr);
952 }
953 attr->add_value(parser->other_xml);
954
955 parser->other_xml = ""; // Null this after use.
956 }
957 else {
958 if (parser->other_xml_depth == 0) {
959 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
960 localname);
961 break;
962 }
963 parser->other_xml_depth--;
964
965 parser->other_xml.append("</");
966 if (prefix) {
967 parser->other_xml.append((const char *) prefix);
968 parser->other_xml.append(":");
969 }
970 parser->other_xml.append(localname);
971 parser->other_xml.append(">");
972 }
973 break;
974 }
975
976 case inside_enum_def:
977 if (is_not(localname, "Enumeration"))
978 D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
979 if (!parser->top_group())
981 "Expected a Group to be the current item, while finishing up an Enumeration.");
982 else {
983 // copy the pointer; not a deep copy
984 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
985 // Set the enum_def to null; next call to enum_def() will
986 // allocate a new object
987 parser->clear_enum_def();
988 parser->pop_state();
989 }
990 break;
991
992 case inside_enum_const:
993 if (is_not(localname, "EnumConst"))
994 D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
995
996 parser->pop_state();
997 break;
998
999 case inside_dim_def: {
1000 if (is_not(localname, "Dimension"))
1001 D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1002
1003 if (!parser->top_group())
1004 D4ParserSax2::dmr_error(parser,
1005 "Expected a Group to be the current item, while finishing up an Dimension.");
1006
1007 // FIXME Use the Group on the top of the group stack
1008 // copy the pointer; not a deep copy
1009 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1010 //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
1011 // Set the dim_def to null; next call to dim_def() will
1012 // allocate a new object. Calling 'clear' is important because
1013 // the cleanup method will free dim_def if it's not null and
1014 // we just copied the pointer in the add_dim_nocopy() call
1015 // above.
1016 parser->clear_dim_def();
1017 parser->pop_state();
1018 break;
1019 }
1020
1021 case inside_simple_type:
1022 if (is_simple_type(get_type(localname))) {
1023 BaseType *btp = parser->top_basetype();
1024 parser->pop_basetype();
1025 parser->pop_attributes();
1026
1027 BaseType *parent = 0;
1028 if (!parser->empty_basetype())
1029 parent = parser->top_basetype();
1030 else if (!parser->empty_group())
1031 parent = parser->top_group();
1032 else {
1033 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1034 localname);
1035 delete btp;
1036 parser->pop_state();
1037 break;
1038 }
1039
1040 if (parent->type() == dods_array_c)
1041 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1042 else
1043 parent->add_var_nocopy(btp);
1044 }
1045 else
1046 D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1047
1048 parser->pop_state();
1049 break;
1050
1051 case inside_dim:
1052 if (is_not(localname, "Dim"))
1053 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1054
1055 parser->pop_state();
1056 break;
1057
1058 case inside_map:
1059 if (is_not(localname, "Map"))
1060 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1061
1062 parser->pop_state();
1063 break;
1064
1065 case inside_constructor: {
1066 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1067 D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1068 return;
1069 }
1070
1071 BaseType *btp = parser->top_basetype();
1072 parser->pop_basetype();
1073 parser->pop_attributes();
1074
1075 BaseType *parent = 0;
1076 if (!parser->empty_basetype())
1077 parent = parser->top_basetype();
1078 else if (!parser->empty_group())
1079 parent = parser->top_group();
1080 else {
1081 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1082 localname);
1083 delete btp;
1084 parser->pop_state();
1085 break;
1086 }
1087
1088 // TODO Why doesn't this code mirror the simple_var case and test
1089 // for the parent being an array? jhrg 10/13/13
1090 parent->add_var_nocopy(btp);
1091 parser->pop_state();
1092 break;
1093 }
1094
1095 case not_dap4_element:
1096 if (parser->debug()) cerr << "End of non DAP4 element: " << localname << endl;
1097 parser->pop_state();
1098 break;
1099
1100 case parser_unknown:
1101 parser->pop_state();
1102 break;
1103
1104 case parser_error:
1105 case parser_fatal_error:
1106 break;
1107
1108 case parser_end:
1109 // FIXME Error?
1110 break;
1111 }
1112
1113 if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1114}
1115
1119void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1120{
1121 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1122
1123 switch (parser->get_state()) {
1124 case inside_attribute_value:
1125 parser->char_data.append((const char *) (ch), len);
1126 DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1127 break;
1128
1129 case inside_other_xml_attribute:
1130 parser->other_xml.append((const char *) (ch), len);
1131 DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1132 break;
1133
1134 default:
1135 break;
1136 }
1137}
1138
1143void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1144{
1145 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1146
1147 switch (parser->get_state()) {
1148 case inside_other_xml_attribute:
1149 parser->other_xml.append((const char *) (ch), len);
1150 break;
1151
1152 default:
1153 break;
1154 }
1155}
1156
1162void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1163{
1164 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1165
1166 switch (parser->get_state()) {
1167 case inside_other_xml_attribute:
1168 parser->other_xml.append((const char *) (value), len);
1169 break;
1170
1171 case parser_unknown:
1172 break;
1173
1174 default:
1175 D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1176
1177 break;
1178 }
1179}
1180
1185xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1186{
1187 return xmlGetPredefinedEntity(name);
1188}
1189
1200void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1201{
1202 va_list args;
1203 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1204
1205 parser->push_state(parser_fatal_error);
1206
1207 va_start(args, msg);
1208 char str[1024];
1209 vsnprintf(str, 1024, msg, args);
1210 va_end(args);
1211
1212 int line = xmlSAX2GetLineNumber(parser->d_context);
1213
1214 if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1215 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1216}
1217
1218void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1219{
1220 va_list args;
1221 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1222
1223 parser->push_state(parser_error);
1224
1225 va_start(args, msg);
1226 char str[1024];
1227 vsnprintf(str, 1024, msg, args);
1228 va_end(args);
1229
1230 int line = xmlSAX2GetLineNumber(parser->d_context);
1231
1232 if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1233 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1234}
1236
1240void D4ParserSax2::cleanup_parse()
1241{
1242 bool wellFormed = d_context->wellFormed;
1243 bool valid = d_context->valid;
1244
1245 // d_context->sax = NULL;
1246 xmlFreeParserCtxt(d_context);
1247
1248 delete d_enum_def;
1249 d_enum_def = 0;
1250
1251 delete d_dim_def;
1252 d_dim_def = 0;
1253
1254 // If there's an error, there may still be items on the stack at the
1255 // end of the parse.
1256 while (!btp_stack.empty()) {
1257 delete top_basetype();
1258 pop_basetype();
1259 }
1260
1261 if (!wellFormed)
1262 throw Error("The DMR was not well formed. " + d_error_msg);
1263 else if (!valid)
1264 throw Error("The DMR was not valid." + d_error_msg);
1265 else if (get_state() == parser_error)
1266 throw Error(d_error_msg);
1267 else if (get_state() == parser_fatal_error)
1268 throw InternalErr(d_error_msg);
1269}
1270
1285void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1286{
1287 d_debug = debug;
1288
1289 // Code example from libxml2 docs re: read from a stream.
1290
1291 if (!f.good())
1292 throw Error("Input stream not open or read error");
1293 if (!dest_dmr)
1294 throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1295
1296 d_dmr = dest_dmr; // dump values here
1297#if 0
1298 int line_num = 1;
1299 string line;
1300
1301 // Get the <xml ... ?> line
1302 getline(f, line);
1303 if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1304
1305 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1306
1307 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1308 d_context->validate = true;
1309 push_state(parser_start);
1310
1311 // Get the first line of stuff
1312 getline(f, line);
1313 ++line_num;
1314
1315 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1316
1317 while (!f.eof() && (get_state() != parser_end)) {
1318 xmlParseChunk(d_context, line.c_str(), line.length(), 0);
1319
1320 // Get the next line
1321 getline(f, line);
1322 ++line_num;
1323
1324 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1325 }
1326 // This call ends the parse.
1327 xmlParseChunk(d_context, line.c_str(), 0, 1/*terminate*/);
1328
1329#else
1330 int line_num = 1;
1331 string line;
1332
1333 // Get the XML prolog line (looks like: <?xml ... ?> )
1334 getline(f, line);
1335 if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1336
1337 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1338
1339 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1340 d_context->validate = true;
1341 push_state(parser_start);
1342
1343 // Get the first chunk of the stuff
1344 long chunk_count = 0;
1345 long chunk_size = 0;
1346
1347 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1348 chunk_size=f.gcount();
1349 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1350 if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1351
1352 while(!f.eof() && (get_state() != parser_end)){
1353
1354 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 0);
1355
1356 // There is more to read. Get the next chunk
1357 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1358 chunk_size=f.gcount();
1359 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1360 if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1361 }
1362
1363 // This call ends the parse.
1364 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 1/*terminate*/);
1365#endif
1366
1367 // This checks that the state on the parser stack is parser_end and throws
1368 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1369 cleanup_parse();
1370}
1371
1386void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1387{
1388 intern(document.c_str(), document.length(), dest_dmr, debug);
1389}
1390
1401void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1402{
1403 if (!(size > 0)) return;
1404
1405 d_debug = debug;
1406
1407 // Code example from libxml2 docs re: read from a stream.
1408
1409 if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1410 d_dmr = dest_dmr; // dump values in dest_dmr
1411
1412 push_state(parser_start);
1413 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, buffer, size, "stream");
1414 d_context->validate = true;
1415
1416 // This call ends the parse.
1417 xmlParseChunk(d_context, buffer, 0, 1/*terminate*/);
1418
1419 // This checks that the state on the parser stack is parser_end and throws
1420 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1421 cleanup_parse();
1422}
1423
1424} // namespace libdap
virtual D4Attributes * attributes()
Definition BaseType.cc:596
void add_var_nocopy(BaseType *bt, Part part=nil) override
D4Attribute * get(const string &fqn)
void add_dim_nocopy(D4Dimension *dim)
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition D4Group.h:84
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition D4Group.h:99
void intern(istream &f, DMR *dest_dmr, bool debug=false)
void set_dap_version(const std::string &version_string)
Definition DMR.cc:261
D4Group * root()
Definition DMR.cc:249
void set_namespace(const std::string &ns)
Set the namespace for this DMR.
Definition DMR.h:164
void set_request_xml_base(const std::string &xb)
Definition DMR.h:158
static std::string getDapNamespaceString(DAPVersion version)
A class for error processing.
Definition Error.h:94
A class for software fault reporting.
Definition InternalErr.h:65
static void dmr_start_document(void *parser)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
top level DAP object to house generic methods
Definition AISConnect.cc:30
Type
Identifies the data type.
Definition Type.h:94
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition util.cc:778
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition util.cc:697
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition util.cc:818
bool is_integer_type(Type t)
Definition util.cc:903
ObjectType get_type(const string &value)
Definition mime_util.cc:324