bes Updated for version 3.20.13
DMZ.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2021 OPeNDAP, Inc.
6// Author: James Gallagher <jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include <vector>
25#include <string>
26#include <iostream>
27#include <fstream>
28#include <unordered_set>
29#include <cstring>
30
31#include <libdap/BaseType.h>
32#include <libdap/Array.h>
33#include <libdap/Type.h>
34#include <libdap/D4Dimensions.h>
35#include <libdap/D4Group.h>
36#include <libdap/D4BaseTypeFactory.h>
37#include <libdap/D4Enum.h>
38#include <libdap/D4EnumDefs.h>
39#include <libdap/D4Attributes.h>
40#include <libdap/D4Maps.h>
41#include <libdap/DMR.h>
42#include <libdap/util.h> // is_simple_type()
43
44#define PUGIXML_NO_XPATH
45#define PUGIXML_HEADER_ONLY
46#include <pugixml.hpp>
47
48#include "url_impl.h" // see bes/http
49#include "DMRpp.h"
50#include "DMZ.h" // this includes the pugixml header
51#include "Chunk.h"
52#include "DmrppCommon.h"
53#include "DmrppArray.h"
54#include "DmrppD4Group.h"
55#include "Base64.h"
56#include "DmrppRequestHandler.h"
57#include "DmrppChunkOdometer.h"
58#include "BESInternalError.h"
59#include "BESDebug.h"
60
61using namespace pugi;
62using namespace std;
63using namespace libdap;
64
65// The pugixml library does not grok namespaces. So, for a tag named 'dmrpp:chunks'
66// if TREAT_NAMESPACES_AS_LITERALS is '1' the parser matches the whole string. If it
67// is '0' the parser only matches the characters after the colon. In both cases the
68// namespace (as XML intends) is not used. Using '1' is a bit more efficient.
69// jhrg 11/2/21
70#define TREAT_NAMESPACES_AS_LITERALS 1
71
72// THe code can either search for a DAP variable's information in the XML, or it can
73// record that during the parse process. Set this when/if the code does the latter.
74// Using this simplifies the lazy-load process, particularly for the DAP2 DDS and
75// data responses (which have not yet been coded completely). jhrg 11/17/21
76#define USE_CACHED_XML_NODE 1
77
78#define SUPPORT_FILL_VALUE_CHUNKS 1
79
80#define PARSER "dmz"
81#define prolog std::string("DMZ::").append(__func__).append("() - ")
82
83namespace dmrpp {
84
85using shape = std::vector<unsigned long long>;
86
87#if 1
88const std::set<std::string> DMZ::variable_elements{"Byte", "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32",
89 "UInt64", "Float32", "Float64", "String", "Structure", "Sequence",
90 "Enum", "Opaque"};
91#endif
92
94static inline bool is_eq(const char *value, const char *key)
95{
96#if TREAT_NAMESPACES_AS_LITERALS
97 return strcmp(value, key) == 0;
98#else
99 if (strcmp(value, key) == 0) {
100 return true;
101 }
102 else {
103 const char* colon = strchr(value, ':');
104 return colon && strcmp(colon + 1, key) == 0;
105 }
106#endif
107}
108
110static inline bool has_dim_nodes(const xml_node &var_node)
111{
112 return var_node.child("Dim"); // just one is enough
113}
114
116static inline bool member_of(const set<string> &elements_set, const string &element_name)
117{
118 return elements_set.find(element_name) != elements_set.end();
119}
120
122static inline DmrppCommon *dc(BaseType *btp)
123{
124 auto *dc = dynamic_cast<DmrppCommon*>(btp);
125 if (!dc)
126 throw BESInternalError(string("Expected a BaseType that was also a DmrppCommon instance (")
127 .append((btp) ? btp->name() : "unknown").append(")."), __FILE__, __LINE__);
128 return dc;
129}
130
136DMZ::DMZ(const string &file_name)
137{
138 parse_xml_doc(file_name);
139}
140
145void
146DMZ::parse_xml_doc(const std::string &file_name)
147{
148 std::ifstream stream(file_name);
149
150 // Free memory used by a previously parsed document.
151 d_xml_doc.reset();
152
153 // parse_ws_pcdata_single will include the space when it appears in a <Value> </Value>
154 // DAP Attribute element. jhrg 11/3/21
155 pugi::xml_parse_result result = d_xml_doc.load(stream, pugi::parse_default | pugi::parse_ws_pcdata_single);
156
157 if (!result)
158 throw BESInternalError(string("DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
159
160 if (!d_xml_doc.document_element())
161 throw BESInternalError("No DMR++ data present.", __FILE__, __LINE__);
162}
163
173void DMZ::process_dataset(DMR *dmr, const xml_node &xml_root)
174{
175 // Process the attributes
176 int required_attrs_found = 0; // there are 1
177 string href_attr;
178 bool href_trusted = false;
179 string dmrpp_version; // empty or holds a value if dmrpp::version is present
180 for (xml_attribute attr = xml_root.first_attribute(); attr; attr = attr.next_attribute()) {
181 if (is_eq(attr.name(), "name")) {
182 ++required_attrs_found;
183 dmr->set_name(attr.value());
184 }
185 else if (is_eq(attr.name(), "dapVersion")) {
186 dmr->set_dap_version(attr.value());
187 }
188 else if (is_eq(attr.name(), "dmrVersion")) {
189 dmr->set_dmr_version(attr.value());
190 }
191 else if (is_eq(attr.name(), "base")) {
192 dmr->set_request_xml_base(attr.value());
193 BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << dmr->request_xml_base() << "'" << endl);
194 }
195 // The pugixml library does not use XML namespaces AFAIK. jhrg 11/2/21
196 else if (is_eq(attr.name(), "xmlns")) {
197 dmr->set_namespace(attr.value());
198 }
199 // This code does not use namespaces. By default, we assume the DMR++ elements
200 // all use the namespace prefix 'dmrpp'. jhrg 11/2/21
201 else if (is_eq(attr.name(), "dmrpp:href")) {
202 href_attr = attr.value();
203 }
204 else if (is_eq(attr.name(), "dmrpp:trust")) {
205 href_trusted = is_eq(attr.value(), "true");
206 }
207 else if (is_eq(attr.name(), "dmrpp:version")) {
208 dmrpp_version = attr.value();
209 }
210 // We allow other, non recognized attributes, so there is no 'else' jhrg 10/20/21
211 }
212
213 if (dmrpp_version.empty()) { // old style DMR++, set enable-kludge flag
214 DmrppRequestHandler::d_emulate_original_filter_order_behavior = true;
215 }
216 else {
217 auto dmrpp = dynamic_cast<DMRpp*>(dmr);
218 if (dmrpp) {
219 dmrpp->set_version(dmrpp_version);
220 }
221 }
222
223 if (required_attrs_found != 1)
224 throw BESInternalError("DMR++ XML dataset element missing one or more required attributes.", __FILE__, __LINE__);
225
226 d_dataset_elem_href.reset(new http::url(href_attr, href_trusted));
227}
228
234void DMZ::process_dimension(D4Group *grp, const xml_node &dimension_node)
235{
236 string name_value;
237 string size_value;
238 for (xml_attribute attr = dimension_node.first_attribute(); attr; attr = attr.next_attribute()) {
239 if (is_eq(attr.name(), "name")) {
240 name_value = attr.value();
241 }
242 else if (is_eq(attr.name(), "size")) {
243 size_value = attr.value();
244 }
245 }
246
247 if (name_value.empty() || size_value.empty())
248 throw BESInternalError("The required attribute 'name' or 'size' was missing from a Dimension element.", __FILE__, __LINE__);
249
250 // This getter (dim_def) allocates a new object if needed.
251 try {
252 auto *dimension = new D4Dimension();
253 dimension->set_name(name_value);
254 dimension->set_size(size_value);
255 grp->dims()->add_dim_nocopy(dimension);
256 }
257 catch (Error &e) {
258 throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
259 }
260}
261
269void DMZ::process_dim(DMR *dmr, D4Group *grp, Array *array, const xml_node &dim_node)
270{
271 assert(array->is_vector_type());
272
273 string name_value;
274 string size_value;
275 for (xml_attribute attr = dim_node.first_attribute(); attr; attr = attr.next_attribute()) {
276 if (is_eq(attr.name(), "name")) {
277 name_value = attr.value();
278 }
279 else if (is_eq(attr.name(), "size")) {
280 size_value = attr.value();
281 }
282 }
283
284 if (name_value.empty() && size_value.empty())
285 throw BESInternalError("Either 'size' or 'name' must be used in a Dim element.", __FILE__, __LINE__);
286 if (!name_value.empty() && !size_value.empty())
287 throw BESInternalError("Only one of 'size' and 'name' are allowed in a Dim element, but both were used.", __FILE__, __LINE__);
288
289 if (!size_value.empty()) {
290 BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << stoi(size_value) << endl);
291 array->append_dim(stoi(size_value));
292 }
293 else if (!name_value.empty()) {
294 BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name_value << endl);
295
296 D4Dimension *dim;
297 if (name_value[0] == '/') // lookup the Dimension in the root group
298 dim = dmr->root()->find_dim(name_value);
299 else
300 // get enclosing Group and lookup Dimension there
301 dim = grp->find_dim(name_value);
302
303 if (!dim)
304 throw BESInternalError("The dimension '" + name_value + "' was not found while parsing the variable '" + array->name() + "'.",__FILE__,__LINE__);
305
306 array->append_dim(dim);
307 }
308}
309
310void DMZ::process_map(DMR *dmr, D4Group *grp, Array *array, const xml_node &map_node)
311{
312 assert(array->is_vector_type());
313
314 string name_value;
315 string size_value;
316 for (xml_attribute attr = map_node.first_attribute(); attr; attr = attr.next_attribute()) {
317 if (is_eq(attr.name(), "name")) {
318 name_value = attr.value();
319 }
320 }
321
322 // All map names are FQNs. If we get one that isn't, assume it's within the most current group.
323 if (name_value[0] != '/')
324 name_value = grp->FQN() + name_value;
325
326 // The array variable that holds the data for the Map
327 Array *map_source = dmr->root()->find_map_source(name_value);
328
329 // In the SAX2 parser, we had 'strict' and 'permissive' modes. For Maps, permissive
330 // allowed the DAP variable for a Map to be missing so that users could request just
331 // the data with the maps. I'm implementing that behavior. Below is the original
332 // comment from DmrppParserSAX2.cc. jhrg 11/3/21
333
334 // Change: If the parser is in 'strict' mode (the default) and the Array named by
335 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
336 // mode), then this is not an error. However, the Array referenced by the Map will
337 // be null. This is a change in the parser's behavior to accommodate requests for
338 // Arrays that include Maps that do not also include the Map(s) in the request.
339 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
340
341 array->maps()->add_map(new D4Map(name_value, map_source));
342}
343
358void DMZ::process_variable(DMR *dmr, D4Group *group, Constructor *parent, const xml_node &var_node)
359{
360 assert(group);
361
362 // Variables are declared using nodes with type names (e.g., <Float32...>)
363 // Variables are arrays if they have one or more <Dim...> child nodes.
364 Type t = get_type(var_node.name());
365
366 assert(t != dods_group_c); // Groups are special and handled elsewhere
367
368 bool is_array_type = has_dim_nodes(var_node);
369 BaseType *btp;
370 if (is_array_type) {
371 btp = add_array_variable(dmr, group, parent, t, var_node);
372 if (t == dods_structure_c || t == dods_sequence_c) {
373 assert(btp->type() == dods_array_c && btp->var()->type() == t);
374 // NB: For an array of a Constructor, add children to the Constructor, not the array
375 parent = dynamic_cast<Constructor*>(btp->var());
376 assert(parent);
377 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
378 if (member_of(variable_elements, child.name()))
379 process_variable(dmr, group, parent, child);
380 }
381 }
382 }
383 else {
384 btp = add_scalar_variable(dmr, group, parent, t, var_node);
385 if (t == dods_structure_c || t == dods_sequence_c) {
386 assert(btp->type() == t);
387 parent = dynamic_cast<Constructor*>(btp);
388 assert(parent);
389 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
390 if (member_of(variable_elements, child.name()))
391 process_variable(dmr, group, parent, child);
392 }
393 }
394 }
395
396 dc(btp)->set_xml_node(var_node);
397}
398
406BaseType *DMZ::build_variable(DMR *dmr, D4Group *group, Type t, const xml_node &var_node)
407{
408 assert(dmr->factory());
409
410 string name_value;
411 string enum_value;
412 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
413 if (is_eq(attr.name(), "name")) {
414 name_value = attr.value();
415 }
416 if (is_eq(attr.name(), "enum")) {
417 enum_value = attr.value();
418 }
419 }
420
421 if (name_value.empty())
422 throw BESInternalError("The variable 'name' attribute was missing.", __FILE__, __LINE__);
423
424 BaseType *btp = dmr->factory()->NewVariable(t, name_value);
425 if (!btp)
426 throw BESInternalError("Could not instantiate the variable ' "+ name_value +"'.", __FILE__, __LINE__);
427
428 btp->set_is_dap4(true);
429
430 if (t == dods_enum_c) {
431 if (enum_value.empty())
432 throw BESInternalError("The variable ' " + name_value + "' lacks an 'enum' attribute.", __FILE__, __LINE__);
433
434 D4EnumDef *enum_def;
435 if (enum_value[0] == '/')
436 enum_def = dmr->root()->find_enum_def(enum_value);
437 else
438 enum_def = group->find_enum_def(enum_value);
439
440 if (!enum_def)
441 throw BESInternalError("Could not find the Enumeration definition '" + enum_value + "'.", __FILE__, __LINE__);
442
443 dynamic_cast<D4Enum&>(*btp).set_enumeration(enum_def);
444 }
445
446 return btp;
447}
448
459BaseType *DMZ::add_scalar_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
460{
461 assert(group);
462
463 BaseType *btp = build_variable(dmr, group, t, var_node);
464
465 // if parent is non-null, the code should add the new var to a constructor,
466 // else add the new var to the group.
467 if (parent)
468 parent->add_var_nocopy(btp);
469 else
470 group->add_var_nocopy(btp);
471
472 return btp;
473}
474
489BaseType *DMZ::add_array_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
490{
491 assert(group);
492
493 BaseType *btp = build_variable(dmr, group, t, var_node);
494
495 // Transform the scalar to an array
496 auto *array = static_cast<Array*>(dmr->factory()->NewVariable(dods_array_c, btp->name()));
497 array->set_is_dap4(true);
498 array->add_var_nocopy(btp);
499
500 // The SAX parser set up the parse of attributes here. For the thin DMR, we won't
501 // parse those from the DMR now. jhrg 10/21/21
502
503 // Now grab the dimension elements
504 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
505 if (is_eq(child.name(), "Dim")) {
506 process_dim(dmr, group, array, child);
507 }
508 else if (is_eq(child.name(), "Map")) {
509 process_map(dmr, group, array, child);
510 }
511 }
512
513 if (parent)
514 parent->add_var_nocopy(array);
515 else
516 group->add_var_nocopy(array);
517
518 return array;
519}
520
529void DMZ::process_group(DMR *dmr, D4Group *parent, const xml_node &var_node)
530{
531 string name_value;
532 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
533 if (is_eq(attr.name(), "name")) {
534 name_value = attr.value();
535 }
536 }
537
538 if (name_value.empty())
539 throw BESInternalError("The required attribute 'name' was missing from a Group element.", __FILE__, __LINE__);
540
541 BaseType *btp = dmr->factory()->NewVariable(dods_group_c, name_value);
542 if (!btp)
543 throw BESInternalError("Could not instantiate the Group '" + name_value + "'.", __FILE__, __LINE__);
544
545 auto new_group = dynamic_cast<DmrppD4Group*>(btp);
546
547 // Need to set this to get the D4Attribute behavior in the type classes
548 // shared between DAP2 and DAP4. jhrg 4/18/13
549 new_group->set_is_dap4(true);
550
551 // link it up and change the current group
552 new_group->set_parent(parent);
553 parent->add_group_nocopy(new_group);
554
555 // Save the xml_node so that we can later find unprocessed XML without searching
556 new_group->set_xml_node(var_node);
557
558 // Now parse all the child nodes of the Group.
559 // NB: this is the same block of code as in build_thin_dmr(); refactor. jhrg 10/21/21
560 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
561 if (is_eq(child.name(), "Dimension")) {
562 process_dimension(new_group, child);
563 }
564 else if (is_eq(child.name(), "Group")) {
565 process_group(dmr, new_group, child);
566 }
567 else if (member_of(variable_elements, child.name())) {
568 process_variable(dmr, new_group, nullptr, child);
569 }
570 }
571}
572
579{
580 auto xml_root_node = d_xml_doc.first_child();
581
582 process_dataset(dmr, xml_root_node);
583
584 auto root_group = dmr->root();
585
586 auto *dg = dynamic_cast<DmrppD4Group*>(root_group);
587 if (!dg)
588 throw BESInternalError("Expected the root group to also be an instance of DmrppD4Group.", __FILE__, __LINE__);
589
590 dg->set_xml_node(xml_root_node);
591
592 for (auto child = xml_root_node.first_child(); child; child = child.next_sibling()) {
593 if (is_eq(child.name(), "Dimension")) {
594 process_dimension(dg, child);
595 }
596 else if (is_eq(child.name(), "Group")) {
597 process_group(dmr, dg, child);
598 }
599 // TODO Add EnumDef
600 else if (member_of(variable_elements, child.name())) {
601 process_variable(dmr, dg, nullptr, child);
602 }
603 }
604}
605
614void DMZ::process_attribute(D4Attributes *attributes, const xml_node &dap_attr_node)
615{
616 string name_value;
617 string type_value;
618 for (xml_attribute attr = dap_attr_node.first_attribute(); attr; attr = attr.next_attribute()) {
619 if (is_eq(attr.name(), "name")) {
620 name_value = attr.value();
621 }
622 if (is_eq(attr.name(), "type")) {
623 type_value = attr.value();
624 }
625 }
626
627 if (name_value.empty() || type_value.empty())
628 throw BESInternalError("The required attribute 'name' or 'type' was missing from an Attribute element.", __FILE__, __LINE__);
629
630 if (type_value == "Container") {
631 // Make the new attribute container and add it to current container
632 auto *dap_attr_cont = new D4Attribute(name_value, attr_container_c);
633 attributes->add_attribute_nocopy(dap_attr_cont);
634 // In this call, 'attributes()' will allocate the D4Attributes object
635 // that will hold the container's attributes.
636 // Test to see if there really are child "Attribute" nodes - empty containers
637 // are allowed. jhrg 11/4/21
638 if (dap_attr_node.first_child()) {
639 for (auto attr_node: dap_attr_node.children("Attribute")) {
640 process_attribute(dap_attr_cont->attributes(), attr_node);
641 }
642 }
643 }
644 else if (type_value == "OtherXML") {
645 // TODO Add support for OtherXML
646 }
647 else {
648 // Make the D4Attribute and add it to the D4Attributes attribute container
649 auto *attribute = new D4Attribute(name_value, StringToD4AttributeType(type_value));
650 attributes->add_attribute_nocopy(attribute);
651 // Process one or more Value elements
652 for (auto value_elem = dap_attr_node.first_child(); value_elem; value_elem = value_elem.next_sibling()) {
653 if (is_eq(value_elem.name(), "Value")) {
654 attribute->add_value(value_elem.child_value()); // returns the text of the first data node
655 }
656 }
657 }
658}
659
677void DMZ::build_basetype_chain(BaseType *btp, stack<BaseType*> &bt)
678{
679 auto parent = btp->get_parent();
680 bt.push(btp);
681
682 // The parent must be non-null and not the root group (the root group has no parent).
683 if (parent && !(parent->type() == dods_group_c && parent->get_parent() == nullptr))
684 build_basetype_chain(parent, bt);
685}
686
687xml_node DMZ::get_variable_xml_node_helper(const xml_node &/*parent_node*/, stack<BaseType*> &/*bt*/)
688{
689#if !USE_CACHED_XML_NODE
690 // When we have an array of Structure or Sequence, both the Array and the
691 // Structure BaseType are pushed on the stack. This happens because, for
692 // constructors, other variables reference them as a parent node (while that's
693 // not the case for the cardinal types held by an array). Here we pop the
694 // Array off the stack. A better solution might be to better control what gets
695 // pushed by build_basetype_chain(). jhrg 10/24/21
696 if (bt.top()->type() == dods_array_c && bt.top()->var()->is_constructor_type())
697 bt.pop();
698
699 // The DMR XML stores both scalar and array variables using XML elements
700 // named for the cardinal type. For an array, that is the type of the
701 // element, so we use BaseType->var()->type_name() for an Array.
702 string type_name = bt.top()->type() == dods_array_c ? bt.top()->var()->type_name(): bt.top()->type_name();
703 string var_name = bt.top()->name();
704 bt.pop();
705
706 // Now look for the node with the correct element type and matching name
707 for (auto node = parent_node.child(type_name.c_str()); node; node = node.next_sibling()) {
708 for (xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
709 if (is_eq(attr.name(), "name") && is_eq(attr.value(), var_name.c_str())) {
710 // if this is the last BaseType on the stack, return the node
711 if (bt.empty())
712 return node;
713 else
714 return get_variable_xml_node_helper(node, bt);
715 }
716 }
717 }
718
719 return xml_node(); // return an empty node
720#else
721 return xml_node(); // return an empty node
722#endif
723}
724
731xml_node DMZ::get_variable_xml_node(BaseType *btp)
732{
733#if USE_CACHED_XML_NODE
734 auto node = dc(btp)->get_xml_node();
735 if (node == nullptr)
736 throw BESInternalError(string("The xml_node for '").append(btp->name()).append("' was not recorded."), __FILE__, __LINE__);
737
738 return node;
739#else
740 // load the BaseType objects onto a stack, since we start at the leaf and
741 // go backward using its 'parent' pointer, the order of BaseTypes on the
742 // stack will match the order in the hierarchy of the DOM tree.
743 stack<BaseType*> bt;
744 build_basetype_chain(btp, bt);
745
746 xml_node dataset = d_xml_doc.first_child();
747 if (!dataset || !is_eq(dataset.name(), "Dataset"))
748 throw BESInternalError("No DMR++ has been parsed.", __FILE__, __LINE__);
749
750 auto node = get_variable_xml_node_helper(dataset, bt);
751 return node;
752#endif
753}
754
760
772void
773DMZ::load_attributes(BaseType *btp)
774{
775 if (dc(btp)->get_attributes_loaded())
776 return;
777
778 load_attributes(btp, get_variable_xml_node(btp));
779
780 // TODO Remove redundant
781 dc(btp)->set_attributes_loaded(true);
782
783 switch (btp->type()) {
784 // When we load attributes for an Array, the set_send_p() method
785 // is called for its 'template' variable, but that call fails (and
786 // the attributes are already loaded). This block marks the attributes
787 // as loaded so the 'var_node == nullptr' exception above does not
788 // get thrown. Maybe a better fix would be to mark 'child variables'
789 // as having their attributes loaded. jhrg 11/16/21
790 case dods_array_c: {
791 dc(btp->var())->set_attributes_loaded(true);
792 break;
793 }
794
795 // FIXME There are no tests for this code. The above block for Array
796 // was needed, so it seems likely that this will be too, but ...
797 // jhrg 11/16/21
798 case dods_structure_c:
799 case dods_sequence_c:
800 case dods_grid_c: {
801 auto *c = dynamic_cast<Constructor*>(btp);
802 if (c) {
803 for (auto i = c->var_begin(), e = c->var_end(); i != e; i++) {
804 dc(btp->var())->set_attributes_loaded(true);
805 }
806 break;
807 }
808 }
809
810 default:
811 break;
812 }
813}
814
820void
821DMZ::load_attributes(BaseType *btp, xml_node var_node) const
822{
823 if (dc(btp)->get_attributes_loaded())
824 return;
825
826 // Attributes for this node will be held in the var_node siblings.
827 // NB: Make an explict call to the BaseType implementation in case
828 // the attributes() method is specialized for this DMR++ code to
829 // trigger a lazy-load of the variables' attributes. jhrg 10/24/21
830 // Could also use BaseType::set_attributes(). jhrg
831 auto attributes = btp->BaseType::attributes();
832 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
833 if (is_eq(child.name(), "Attribute")) {
834 process_attribute(attributes, child);
835 }
836 }
837
838 dc(btp)->set_attributes_loaded(true);
839}
840
845void
846DMZ::load_attributes(Constructor *constructor)
847{
848 load_attributes(constructor, get_variable_xml_node(constructor));
849 for (auto i = constructor->var_begin(), e = constructor->var_end(); i != e; ++i) {
850 // Groups are not allowed inside a Constructor
851 assert((*i)->type() != dods_group_c);
852 load_attributes(*i);
853 }
854}
855
856void
857DMZ::load_attributes(D4Group *group) {
858 // The root group is special; look for its DAP Attributes in the Dataset element
859 if (group->get_parent() == nullptr) {
860 xml_node dataset = d_xml_doc.child("Dataset");
861 if (!dataset)
862 throw BESInternalError("Could not find the 'Dataset' element in the DMR++ XML document.", __FILE__, __LINE__);
863 load_attributes(group, dataset);
864 }
865 else {
866 load_attributes(group, get_variable_xml_node(group));
867 }
868
869 for (auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
870 // Even though is_constructor_type() returns true for instances of D4Group,
871 // Groups are kept under a separate container from variables because they
872 // have a different function than the Structure and Sequence types (Groups
873 // never hold data).
874 assert((*i)->type() != dods_group_c);
875 load_attributes(*i);
876 }
877
878 for (auto i = group->grp_begin(), e = group->grp_end(); i != e; ++i) {
879 load_attributes(*i);
880 }
881}
882
883void DMZ::load_all_attributes(libdap::DMR *dmr)
884{
885 assert(d_xml_doc != nullptr);
886 load_attributes(dmr->root());
887}
888
890
895
905void
906DMZ::process_compact(BaseType *btp, const xml_node &compact)
907{
908 dc(btp)->set_compact(true);
909
910 auto char_data = compact.child_value();
911 if (!char_data)
912 throw BESInternalError("The dmrpp::compact is missing data values.",__FILE__,__LINE__);
913
914 std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
915
916 if (btp->type() != dods_array_c)
917 throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
918
919 // We know from the above that this is an Array, so accessing btp->var() is OK.
920 switch (btp->var()->type()) {
921 case dods_array_c:
922 throw BESInternalError("DMR++ document fail: An Array may not be the template for an Array.", __FILE__, __LINE__);
923
924 case dods_byte_c:
925 case dods_char_c:
926 case dods_int8_c:
927 case dods_uint8_c:
928 case dods_int16_c:
929 case dods_uint16_c:
930 case dods_int32_c:
931 case dods_uint32_c:
932 case dods_int64_c:
933 case dods_uint64_c:
934
935 case dods_enum_c:
936
937 case dods_float32_c:
938 case dods_float64_c:
939 btp->val2buf(reinterpret_cast<void *>(decoded.data()));
940 btp->set_read_p(true);
941 break;
942
943 case dods_str_c:
944 case dods_url_c: {
945 std::string str(decoded.begin(), decoded.end());
946 auto *st = static_cast<DmrppArray *>(btp);
947 // Although val2buf() takes a void*, for DAP Str and Url types, it casts
948 // that to std::string*. jhrg 11/4/21
949 st->val2buf(&str);
950 st->set_read_p(true);
951 break;
952 }
953
954 default:
955 throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
956 }
957}
958
966void DMZ::process_chunk(DmrppCommon *dc, const xml_node &chunk) const
967{
968 string href;
969 string trust;
970 string offset;
971 string size;
972 string chunk_position_in_array;
973
974 bool href_trusted = false;
975
976 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
977 if (is_eq(attr.name(), "href")) {
978 href = attr.value();
979 }
980 else if (is_eq(attr.name(), "trust") || is_eq(attr.name(), "dmrpp:trust")) {
981 href_trusted = is_eq(attr.value(), "true");
982 }
983 else if (is_eq(attr.name(), "offset")) {
984 offset = attr.value();
985 }
986 else if (is_eq(attr.name(), "nBytes")) {
987 size = attr.value();
988 }
989 else if (is_eq(attr.name(), "chunkPositionInArray")) {
990 chunk_position_in_array = attr.value();
991 }
992 }
993
994 if (offset.empty() || size.empty())
995 throw BESInternalError("Both size and offset are required for a chunk node.", __FILE__, __LINE__);
996
997 if (!href.empty()) {
998 shared_ptr<http::url> data_url(new http::url(href, href_trusted));
999 dc->add_chunk(data_url, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
1000 }
1001 else {
1002 dc->add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
1003 }
1004}
1005
1012void DMZ::process_cds_node(DmrppCommon *dc, const xml_node &chunks)
1013{
1014 for (auto child = chunks.child("dmrpp:chunkDimensionSizes"); child; child = child.next_sibling()) {
1015 if (is_eq(child.name(), "dmrpp:chunkDimensionSizes")) {
1016 string sizes = child.child_value();
1017 dc->parse_chunk_dimension_sizes(sizes);
1018 }
1019 }
1020}
1021
1022static void add_fill_value_information(DmrppCommon *dc, const string &value_string, libdap::Type fv_type)
1023{
1024 dc->set_fill_value_string(value_string);
1025 dc->set_fill_value_type(fv_type);
1026 dc->set_uses_fill_value(true);
1027 }
1028
1029// a 'dmrpp:chunks' node has a chunkDimensionSizes node and then one or more chunks
1030// nodes, and they have to be in that order.
1031void DMZ::process_chunks(DmrppCommon *dc, const xml_node &chunks) const
1032{
1033 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
1034 if (is_eq(attr.name(), "compressionType")) {
1035 dc->set_filter(attr.value());
1036 }
1037 else if (is_eq(attr.name(), "fillValue")) {
1038 // Fill values are only supported for Arrays (5/9/22)
1039 auto array = dynamic_cast<libdap::Array*>(dc);
1040 if (!array)
1041 throw BESInternalError("Fill Value chunks are only supported for Arrays.", __FILE__, __LINE__);
1042
1043 add_fill_value_information(dc, attr.value(), array->var()->type());
1044 }
1045 else if (is_eq(attr.name(), "byteOrder")) {
1046 dc->ingest_byte_order(attr.value());
1047 }
1048 }
1049
1050 // Look for the chunksDimensionSizes element - it will not be present for contiguous data
1051 process_cds_node(dc, chunks);
1052
1053 // Chunks for this node will be held in the var_node siblings.
1054 for (auto chunk = chunks.child("dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
1055 if (is_eq(chunk.name(), "dmrpp:chunk")) {
1056 process_chunk(dc, chunk);
1057 }
1058 }
1059}
1060
1066vector<unsigned long long> DMZ::get_array_dims(Array *array)
1067{
1068 vector<unsigned long long> array_dim_sizes;
1069 for (auto i= array->dim_begin(), e = array->dim_end(); i != e; ++i) {
1070 array_dim_sizes.push_back(array->dimension_size(i));
1071 }
1072
1073 return array_dim_sizes;
1074}
1075
1086size_t DMZ::logical_chunks(const vector <unsigned long long> &array_dim_sizes, const DmrppCommon *dc)
1087{
1088 auto const& chunk_dim_sizes = dc->get_chunk_dimension_sizes();
1089 if (chunk_dim_sizes.size() != array_dim_sizes.size()) {
1090 ostringstream oss;
1091 oss << "Expected the chunk and array rank to match (chunk: " << chunk_dim_sizes.size() << ", array: "
1092 << array_dim_sizes.size() << ")";
1093 throw BESInternalError(oss.str(), __FILE__, __LINE__);
1094 }
1095
1096 size_t num_logical_chunks = 1;
1097 auto i = array_dim_sizes.begin();
1098 for (auto chunk_dim_size: chunk_dim_sizes) {
1099 auto array_dim_size = *i++;
1100 num_logical_chunks *= (size_t)ceil((float)array_dim_size / (float)chunk_dim_size);
1101 }
1102
1103 return num_logical_chunks;
1104}
1105
1120set< vector<unsigned long long> > DMZ::get_chunk_map(const vector<shared_ptr<Chunk>> &chunks)
1121{
1122 set< vector<unsigned long long> > chunk_map;
1123 for (auto const &chunk: chunks) {
1124 chunk_map.insert(chunk->get_position_in_array());
1125 }
1126
1127 return chunk_map;
1128}
1129
1138void DMZ::process_fill_value_chunks(DmrppCommon *dc, const set<shape> &chunk_map, const shape &chunk_shape,
1139 const shape &array_shape, unsigned long long chunk_size)
1140{
1141 // Use an Odometer to walk over each potential chunk
1142 DmrppChunkOdometer odometer(array_shape, chunk_shape);
1143 do {
1144 const auto &s = odometer.indices();
1145 if (chunk_map.find(s) == chunk_map.end()) {
1146 // Fill Value chunk
1147 // what we need byte order, pia, fill value
1148 dc->add_chunk(dc->get_byte_order(), dc->get_fill_value(), dc->get_fill_value_type(), chunk_size, s);
1149 }
1150 } while (odometer.next());
1151}
1152
1161void DMZ::load_chunks(BaseType *btp)
1162{
1163 if (dc(btp)->get_chunks_loaded())
1164 return;
1165
1166 // goto the DOM tree node for this variable
1167 xml_node var_node = get_variable_xml_node(btp);
1168 if (var_node == nullptr)
1169 throw BESInternalError("Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
1170
1171 // Chunks for this node will be held in the var_node siblings. For a given BaseType, there should
1172 // be only one chunks node xor one chunk node.
1173 int chunks_found = 0;
1174 int chunk_found = 0;
1175 int compact_found = 0;
1176
1177 // Chunked data
1178 auto child = var_node.child("dmrpp:chunks");
1179 if (child) {
1180 chunks_found = 1;
1181 process_chunks(dc(btp), child);
1182 auto array = dynamic_cast<Array*>(btp);
1183 // It's possible to have a chunk, but not have a chunk dimension sizes element
1184 // when there is only one chunk (e.g., with HDF5 Contiguous storage). jhrg 5/5/22
1185 if (array && !dc(btp)->get_chunk_dimension_sizes().empty()) {
1186 auto const &array_shape = get_array_dims(array);
1187 size_t num_logical_chunks = logical_chunks(array_shape, dc(btp));
1188 // do we need to run this code?
1189 if (num_logical_chunks != dc(btp)->get_chunks_size()) {
1190 auto const &chunk_map = get_chunk_map(dc(btp)->get_immutable_chunks());
1191 // Since the variable has some chunks that hold only fill values, add those chunks
1192 // to the vector of chunks.
1193 auto const &chunk_shape = dc(btp)->get_chunk_dimension_sizes();
1194 unsigned long long chunk_size_bytes = array->var()->width(); // start with the element size in bytes
1195 for (auto dim_size: chunk_shape)
1196 chunk_size_bytes *= dim_size;
1197 process_fill_value_chunks(dc(btp), chunk_map, dc(btp)->get_chunk_dimension_sizes(),
1198 array_shape, chunk_size_bytes);
1199 }
1200 }
1201 // If both chunks and chunk_dimension_sizes are empty, this is contiguous storage
1202 // with nothing but fill values. Make a single chunk that can hold the fill values.
1203 else if (array && dc(btp)->get_immutable_chunks().empty()) {
1204 auto const &array_shape = get_array_dims(array);
1205 // Since there is one chunk, the chunk size and array size are one and the same.
1206 unsigned long long array_size_bytes = 1;
1207 for (auto dim_size: array_shape)
1208 array_size_bytes *= dim_size;
1209 // array size above is in _elements_, multiply by the element width to get bytes
1210 array_size_bytes *= array->var()->width();
1211 // Position in array is 0, 0, ..., 0 were the number of zeros is the number of array dimensions
1212 shape pia(0,array_shape.size());
1213 auto dcp = dc(btp);
1214 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), array_size_bytes, pia);
1215 }
1216 }
1217
1218 // Contiguous data
1219 auto chunk = var_node.child("dmrpp:chunk");
1220 if (chunk) {
1221 chunk_found = 1;
1222 process_chunk(dc(btp), chunk);
1223 }
1224
1225 auto compact = var_node.child("dmrpp:compact");
1226 if (compact) {
1227 compact_found = 1;
1228 process_compact(btp, compact);
1229 }
1230
1231 // Here we (optionally) check that exactly one of the three types of node was found
1232 if (DmrppRequestHandler::d_require_chunks) {
1233 int elements_found = chunks_found + chunk_found + compact_found;
1234 if (elements_found != 1) {
1235 ostringstream oss;
1236 oss << "Expected chunk, chunks or compact information in the DMR++ data. Found " << elements_found
1237 << " types of nodes.";
1238 throw BESInternalError(oss.str(), __FILE__, __LINE__);
1239 }
1240 }
1241
1242 dc(btp)->set_chunks_loaded(true);
1243}
1244
1246
1247} // namespace dmrpp
exception thrown if internal error encountered
DMZ()=default
Build a DMZ without simultaneously parsing an XML document.
virtual void load_chunks(libdap::BaseType *btp)
Load the chunk information into a variable.
Definition: DMZ.cc:1161
void parse_xml_doc(const std::string &filename)
Build the DOM tree for a DMR++ XML document.
Definition: DMZ.cc:146
virtual void build_thin_dmr(libdap::DMR *dmr)
populate the DMR instance as a 'thin DMR'
Definition: DMZ.cc:578
virtual void set_fill_value_string(const std::string &fv)
Set the fill value (using a string)
Definition: DmrppCommon.h:214
virtual libdap::Type get_fill_value_type() const
Definition: DmrppCommon.h:226
virtual std::string get_fill_value() const
Definition: DmrppCommon.h:223
virtual void set_uses_fill_value(bool ufv)
Set the uses_fill_value property.
Definition: DmrppCommon.h:211
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:128
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Adds a chunk to the vector of chunk refs (byteStreams) and returns the size of the chunks internal ve...
Definition: DmrppCommon.cc:208
void set_filter(const std::string &value)
Set the value of the filters property.
Definition: DmrppCommon.cc:102
virtual void ingest_byte_order(const std::string &byte_order_string)
Parses the text content of the XML element chunks:byteOrder.
Definition: DmrppCommon.cc:177
virtual const std::vector< unsigned long long > & get_chunk_dimension_sizes() const
The chunk dimension sizes held in a const vector.
Definition: DmrppCommon.h:195
virtual void set_fill_value_type(libdap::Type t)
Set the libdap data type to use with the fill value.
Definition: DmrppCommon.h:217
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:163
Type
Type of JSON value.
Definition: rapidjson.h:664