bes Updated for version 3.20.13
NCMLParser.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29#include "config.h"
30#include "NCMLParser.h" // ncml_module
31
32#include "AggregationElement.h" // ncml_module
33#include "AggregationUtil.h" // agg_util
34#include <BESConstraintFuncs.h>
35#include <BESDataDDSResponse.h>
36#include <BESDDSResponse.h>
37#include <BESDebug.h>
38#include <BESStopWatch.h>
39#include "DDSLoader.h" // ncml_module
40#include "DimensionElement.h" // ncml_module
41#include <libdap/AttrTable.h> // libdap
42#include <libdap/BaseType.h> // libdap
43#include <libdap/DAS.h> // libdap
44#include <libdap/DDS.h> // libdap
45//#include <libdap/mime_util.h>
46#include <libdap/Structure.h> // libdap
47#include <map>
48#include <memory>
49#include "NCMLDebug.h" // ncml_module
50#include "NCMLElement.h" // ncml_module
51#include "NCMLUtil.h" // ncml_module
52#include "NetcdfElement.h" // ncml_module
53#include "OtherXMLParser.h" // ncml_module
54#include <libdap/parser.h> // libdap for the type checking...
55#include "SaxParserWrapper.h" // ncml_module
56#include <sstream>
57
58// For extra debug spew for now.
59#define DEBUG_NCML_PARSER_INTERNALS 1
60
61using namespace agg_util;
62
63namespace ncml_module {
64
65// From the DAP 2 guide....
66static const unsigned int MAX_DAP_STRING_SIZE = 32767;
67
68// Consider filling this with a compilation flag.
69/* static */bool NCMLParser::sThrowExceptionOnUnknownElements = true;
70
71// An attribute or variable with type "Structure" will match this string.
72const string NCMLParser::STRUCTURE_TYPE("Structure");
73
74// Just cuz I hate magic -1. Used in _currentParseLine
75static const int NO_CURRENT_PARSE_LINE_NUMBER = -1;
76
78// Helper class.
79AttrTableLazyPtr::AttrTableLazyPtr(const NCMLParser& parser, AttrTable* pAT/*=0*/) :
80 _parser(parser), _pAttrTable(pAT), _loaded(pAT)
81{
82}
83
84AttrTableLazyPtr::~AttrTableLazyPtr()
85{
86 _pAttrTable = 0;
87 _loaded = false;
88}
89
90AttrTable*
92{
93 if (!_loaded) {
94 const_cast<AttrTableLazyPtr*>(this)->loadAndSetAttrTable();
95 }
96 return _pAttrTable;
97}
98
99void AttrTableLazyPtr::set(AttrTable* pAT)
100{
101 _pAttrTable = pAT;
102 if (pAT) {
103 _loaded = true;
104 }
105 else {
106 _loaded = false;
107 }
108}
109
111{
112 // force it to load next get().
113 _pAttrTable = 0;
114 _loaded = false;
115}
116
117void AttrTableLazyPtr::loadAndSetAttrTable()
118{
119 set(0);
120 NetcdfElement* pDataset = _parser.getCurrentDataset();
121 if (pDataset) {
122 // The lazy load actually occurs in here
123 DDS* pDDS = pDataset->getDDS();
124 if (pDDS) {
125 set(&(pDDS->get_attr_table()));
126 _loaded = true;
127 }
128 }
129}
130
133
135 _filename(""), _loader(loader), _responseType(DDSLoader::eRT_RequestDDX), _response(0), _rootDataset(0), _currentDataset(
136 0), _pVar(0), _pCurrentTable(*this, 0), _elementStack(), _scope(), _namespaceStack(), _pOtherXMLParser(0), _currentParseLine(
137 NO_CURRENT_PARSE_LINE_NUMBER)
138{
139 BESDEBUG("ncml", "Created NCMLParser." << endl);
140}
141
142NCMLParser::~NCMLParser()
143{
144 // clean other stuff up
145 cleanup();
146}
147
148unique_ptr<BESDapResponse> NCMLParser::parse(const string& ncmlFilename, DDSLoader::ResponseType responseType)
149{
150 // Parse into a newly created object.
151 unique_ptr<BESDapResponse> response = DDSLoader::makeResponseForType(responseType);
152
153 // Parse into the response. We still got it in the unique_ptr in this scope, so we're safe
154 // on exception since the unique_ptr in this func will cleanup the memory.
155 parseInto(ncmlFilename, responseType, response.get());
156
157 // Relinquish it to the caller
158 return response;
159}
160
161void NCMLParser::parseInto(const string& ncmlFilename, DDSLoader::ResponseType responseType, BESDapResponse* response)
162{
163 BESStopWatch sw2;
164 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw2.start("NCMLParser::parseInto", ncmlFilename);
165
166 VALID_PTR(response);
167 NCML_ASSERT_MSG(DDSLoader::checkResponseIsValidType(responseType, response),
168 "NCMLParser::parseInto: got wrong response object for given type.");
169
170 _responseType = responseType;
171 _response = response;
172
173 if (parsing()) {
174 THROW_NCML_INTERNAL_ERROR("Illegal Operation: NCMLParser::parse called while already parsing!");
175 }
176
177 BESDEBUG("ncml", "Beginning NcML parse of file=" << ncmlFilename << endl);
178
179 // In case we care.
180 _filename = ncmlFilename;
181
182 // Invoke the libxml sax parser
183 SaxParserWrapper parser(*this);
184
185 parser.parse(ncmlFilename);
186
187 // Prepare for a new parse, making sure it's all cleaned up (with the exception of the _ddsResponse
188 // which where's about to send off)
189 resetParseState();
190
191 // we're done with it.
192 _response = 0;
193}
194
196{
197 return !_filename.empty();
198}
199
201{
202 return _currentParseLine;
203}
204
207{
208 return _namespaceStack;
209}
210
211void NCMLParser::onStartDocument()
212{
213 BESDEBUG("ncml", "onStartDocument." << endl);
214}
215
216void NCMLParser::onEndDocument()
217{
218 BESDEBUG("ncml", "onEndDocument." << endl);
219}
220
221void NCMLParser::onStartElement(const std::string& name, const XMLAttributeMap& attrs)
222{
223 // If we have a proxy set for OtherXML, pass calls there.
224 if (isParsingOtherXML()) {
225 VALID_PTR(_pOtherXMLParser);
226 _pOtherXMLParser->onStartElement(name, attrs);
227 }
228 else // Otherwise do the standard NCML parse
229 {
230 processStartNCMLElement(name, attrs);
231 }
232}
233
234// Local helper for below...
235// Sees whether we are closing the element on top
236// of the NCMLElement stack and that we're not parsing
237// OtherXML, or if we are that its depth is now zero.
238static bool shouldStopOtherXMLParse(NCMLElement* top, const string& closingElement, OtherXMLParser& rProxyParser)
239{
240 // If the stack top element name is the same as the element we are closing...
241 // and the parse depth is 0, then we're done.
242 // We MUST check the parse depth in case the other XML has an Attribute in it!
243 // We want to be sure we're closing the right one.
244 if (top->getTypeName() == closingElement && rProxyParser.getParseDepth() == 0) {
245 return true;
246 }
247 else // we're not done.
248 {
249 return false;
250 }
251}
252
253void NCMLParser::onEndElement(const std::string& name)
254{
255 NCMLElement* elt = getCurrentElement();
256 VALID_PTR(elt);
257
258 // First, handle the OtherXML proxy parsing case
259 if (isParsingOtherXML()) {
260 VALID_PTR(_pOtherXMLParser);
261 // If we're closing the element that caused the OtherXML parse...
262 if (shouldStopOtherXMLParse(elt, name, *_pOtherXMLParser)) {
263 // Then we want to clear the proxy from this and
264 // call the end on the top of the element stack.
265 // We assume it has access to the OtherXML parser
266 // and will use the data.
267 _pOtherXMLParser = 0;
268 processEndNCMLElement(name);
269 }
270 else {
271 // Pass through to proxy
272 _pOtherXMLParser->onEndElement(name);
273 }
274 }
275 else // Do the regular NCMLElement call.
276 {
277 // Call the regular NCMLElement end element.
278 processEndNCMLElement(name);
279 }
280}
281
282void NCMLParser::onStartElementWithNamespace(const std::string& localname, const std::string& prefix,
283 const std::string& uri, const XMLAttributeMap& attributes, const XMLNamespaceMap& namespaces)
284{
285 // If we have a proxy set for OtherXML, pass calls there.
286 if (isParsingOtherXML()) {
287 VALID_PTR(_pOtherXMLParser);
288 _pOtherXMLParser->onStartElementWithNamespace(localname, prefix, uri, attributes, namespaces);
289 }
290 else // Otherwise do the standard NCML parse
291 // but keep the namespaces on the stack. We don't do this for OtherXML.
292 {
293 _namespaceStack.push(namespaces);
294 processStartNCMLElement(localname, attributes);
295 }
296}
297
298void NCMLParser::onEndElementWithNamespace(const std::string& localname, const std::string& prefix,
299 const std::string& uri)
300{
301 NCMLElement* elt = getCurrentElement();
302 VALID_PTR(elt);
303
304 // First, handle the OtherXML proxy parsing case
305 if (isParsingOtherXML()) {
306 VALID_PTR(_pOtherXMLParser);
307 // If we're closing the element that caused the OtherXML parse...
308 if (shouldStopOtherXMLParse(elt, localname, *_pOtherXMLParser)) {
309 // Then we want to clear the proxy from this and
310 // call the end on the top of the element stack.
311 // We assume it has access to the OtherXML parser
312 // and will use the data.
313 _pOtherXMLParser = 0;
314 processEndNCMLElement(localname);
315 }
316 else {
317 // Pass through to proxy
318 _pOtherXMLParser->onEndElementWithNamespace(localname, prefix, uri);
319 }
320 }
321 else // Do the regular NCMLElement call.
322 {
323 // Call the regular NCMLElement end element.
324 processEndNCMLElement(localname);
325 _namespaceStack.pop();
326 }
327}
328
329void NCMLParser::onCharacters(const std::string& content)
330{
331 // If we're parsing OtherXML, send the call to the proxy.
332 if (isParsingOtherXML()) {
333 VALID_PTR(_pOtherXMLParser);
334 _pOtherXMLParser->onCharacters(content);
335 }
336 else // Standard NCML parse
337 {
338 // If we got an element on the stack, hand it off. Otherwise, do nothing.
339 NCMLElement* elt = getCurrentElement();
340 if (elt) {
341 elt->handleContent(content);
342 }
343 }
344}
345
346void NCMLParser::onParseWarning(std::string msg)
347{
348 // TODO We may want to make a flag for considering warnings errors as well.
349 BESDEBUG("ncml", "PARSE WARNING: LibXML msg={" << msg << "}. Attempting to continue parse." << endl);
350}
351
352void NCMLParser::onParseError(std::string msg)
353{
354 // Pretty much have to give up on malformed XML.
355 THROW_NCML_PARSE_ERROR(getParseLineNumber(), "libxml SAX2 parser error! msg={" + msg + "} Terminating parse!");
356}
357
359{
360 _currentParseLine = line;
361 // BESDEBUG("ncml", "******** Now parsing line: " << line << endl);
362}
363
365// Non-public Implemenation
366
367bool NCMLParser::isScopeAtomicAttribute() const
368{
369 return (!_scope.empty()) && (_scope.topType() == ScopeStack::ATTRIBUTE_ATOMIC);
370}
371
372bool NCMLParser::isScopeAttributeContainer() const
373{
374 return (!_scope.empty()) && (_scope.topType() == ScopeStack::ATTRIBUTE_CONTAINER);
375}
376
377bool NCMLParser::isScopeSimpleVariable() const
378{
379 return (!_scope.empty()) && (_scope.topType() == ScopeStack::VARIABLE_ATOMIC);
380}
381
382bool NCMLParser::isScopeCompositeVariable() const
383{
384 return (!_scope.empty()) && (_scope.topType() == ScopeStack::VARIABLE_CONSTRUCTOR);
385}
386
387bool NCMLParser::isScopeVariable() const
388{
389 return (isScopeSimpleVariable() || isScopeCompositeVariable());
390}
391
392bool NCMLParser::isScopeGlobal() const
393{
394 return withinNetcdf() && _scope.empty();
395}
396
397// TODO Clean up these next two calls with a parser state or something....
398// Dynamic casting all the time isn't super fast or clean if not needed...
399bool NCMLParser::isScopeNetcdf() const
400{
401 // see if the last thing parsed was <netcdf>
402 return (!_elementStack.empty() && dynamic_cast<NetcdfElement*>(_elementStack.back()));
403}
404
405bool NCMLParser::isScopeAggregation() const
406{
407 // see if the last thing parsed was <netcdf>
408 return (!_elementStack.empty() && dynamic_cast<AggregationElement*>(_elementStack.back()));
409}
410
411bool NCMLParser::withinNetcdf() const
412{
413 return _currentDataset != 0;
414}
415
416bool NCMLParser::withinVariable() const
417{
418 return withinNetcdf() && _pVar;
419}
420
422NCMLParser::getDDSLoader() const
423{
424 return _loader;
425}
426
427NetcdfElement*
428NCMLParser::getCurrentDataset() const
429{
430 return _currentDataset;
431}
432
433NetcdfElement*
434NCMLParser::getRootDataset() const
435{
436 return _rootDataset;
437}
438
439DDS*
440NCMLParser::getDDSForCurrentDataset() const
441{
442 NetcdfElement* dataset = getCurrentDataset();
443 NCML_ASSERT_MSG(dataset, "getDDSForCurrentDataset() called when we're not processing a <netcdf> location!");
444 return dataset->getDDS();
445}
446
447void NCMLParser::pushCurrentDataset(NetcdfElement* dataset)
448{
449 VALID_PTR(dataset);
450 // The first one we get is the root It's special!
451 // We tell it to use the top level response object for the
452 // parser, since that's what ultimately is returned
453 // and we don't want the root making its own we need to copy.
454 bool thisIsRoot = !_rootDataset;
455 if (thisIsRoot) {
456 _rootDataset = dataset;
457 VALID_PTR(_response);
458 _rootDataset->borrowResponseObject(_response);
459 }
460 else {
461 addChildDatasetToCurrentDataset(dataset);
462 }
463
464 // Also invalidates the AttrTable so it gets cached again.
465 setCurrentDataset(dataset);
466
467 // TODO: What do we do with the scope stack for a nested dataset?!
468}
469
470void NCMLParser::popCurrentDataset(NetcdfElement* dataset)
471{
472 if (dataset && dataset != _currentDataset) {
473 THROW_NCML_INTERNAL_ERROR(
474 "NCMLParser::popCurrentDataset(): the dataset we expect on the top of the stack is not correct!");
475 }
476
477 dataset = getCurrentDataset();
478 VALID_PTR(dataset);
479
480 // If it's the root, we're done and need to clear up the state.
481 if (dataset == _rootDataset) {
482 _rootDataset->unborrowResponseObject(_response);
483 _rootDataset = 0;
484 setCurrentDataset(0);
485 }
486 else {
487 // If it's not the root, it should have a parent, so go get it and make that the new current.
488 NetcdfElement* parentDataset = dataset->getParentDataset();
489 NCML_ASSERT_MSG(parentDataset, "NCMLParser::popCurrentDataset() got non-root dataset, but it had no parent!!");
490 setCurrentDataset(parentDataset);
491 }
492}
493
494void NCMLParser::setCurrentDataset(NetcdfElement* dataset)
495{
496 if (dataset) {
497 // Make sure it's state is ready to go with operations before making it current
498 NCML_ASSERT(dataset->isValid());
499 _currentDataset = dataset;
500 // We don't set the current attr table, rather it is lazy eval
501 // from getCurrentAttrTable() only if called. This call tells it to do that.
502 _pCurrentTable.invalidate();
503
504 // UNLESS it's the root dataset, which we want to force to load
505 // since a passthrough file will generate an empty metadata set otherwise
506 // since the table is never requested.
507 if (_currentDataset == _rootDataset) {
508 // Force it to cache so we actually laod the metadata for the root set.
509 // Chidl sets are aggregations so we don't load those unless needed.
510 _pCurrentTable.set(_pCurrentTable.get());
511 }
512 }
513 else {
514 BESDEBUG("ncml", "NCMLParser::setCurrentDataset(): setting to NULL..." << endl);
515 _currentDataset = 0;
516 _pCurrentTable.invalidate();
517 }
518}
519
520void NCMLParser::addChildDatasetToCurrentDataset(NetcdfElement* dataset)
521{
522 VALID_PTR(dataset);
523
524 AggregationElement* agg = _currentDataset->getChildAggregation();
525 if (!agg) {
526 THROW_NCML_INTERNAL_ERROR(
527 "NCMLParser::addChildDatasetToCurrentDataset(): current dataset has no aggregation element! We can't add it!");
528 }
529
530 // This will add as strong ref to dataset from agg (child) and a weak to agg from dataset (parent)
531 agg->addChildDataset(dataset);
532
533 // Force the dataset to create an internal response object for the request type we're processing
534 dataset->createResponseObject(_responseType);
535}
536
537bool NCMLParser::parsingDataRequest() const
538{
539 const BESDataDDSResponse* const pDataDDSResponse = dynamic_cast<const BESDataDDSResponse* const >(_response);
540 return (pDataDDSResponse);
541}
542
543void NCMLParser::loadLocation(const std::string& location, agg_util::DDSLoader::ResponseType responseType,
544 BESDapResponse* response)
545{
546 VALID_PTR(response);
547 _loader.loadInto(location, responseType, response);
548}
549
550void NCMLParser::resetParseState()
551{
552 _filename = "";
553 _pVar = 0;
554 _pCurrentTable.set(0);
555
556 _scope.clear();
557
558 // Not that this matters...
559 _responseType = DDSLoader::eRT_RequestDDX;
560
561 // We never own the memory in this, so just clear it.
562 _response = 0;
563
564 // We don't own these either.
565 _rootDataset = 0;
566 _currentDataset = 0;
567
568 // Cleanup any memory in the _elementStack
569 clearElementStack();
570
571 _namespaceStack.clear();
572
573 // just in case
574 _loader.cleanup();
575
576 // In case we had one, null it. The setter is in charge of the memory.
577 _pOtherXMLParser = 0;
578}
579
580bool NCMLParser::isNameAlreadyUsedAtCurrentScope(const std::string& name)
581{
582 return (getVariableInCurrentVariableContainer(name) || attributeExistsAtCurrentScope(name));
583}
584
585BaseType*
586NCMLParser::getVariableInCurrentVariableContainer(const string& name)
587{
588 return getVariableInContainer(name, _pVar);
589}
590
591BaseType*
592NCMLParser::getVariableInContainer(const string& varName, BaseType* pContainer)
593{
594 // BaseType::btp_stack varContext;
595 if (pContainer) {
596 // @@@ Old code... recurses and uses dots as field separators... Not good.
597 //return pContainer->var(varName, varContext);
598 // It has to be a Constructor!
599 Constructor* pCtor = dynamic_cast<Constructor*>(pContainer);
600 if (!pCtor) {
601 BESDEBUG("ncml",
602 "WARNING: NCMLParser::getVariableInContainer: " "Expected a BaseType of subclass Constructor, but didn't get it!" << endl);
603 return 0;
604 }
605 else {
607 }
608 }
609 else {
610 return getVariableInDDS(varName);
611 }
612}
613
614// Not that this should take a fully qualified one too, but without a scoping operator (.) it will
615// just search the top level variables.
616BaseType*
617NCMLParser::getVariableInDDS(const string& varName)
618{
619 // BaseType::btp_stack varContext;
620 // return getDDSForCurrentDataset()->var(varName, varContext);
621 DDS* pDDS = getDDSForCurrentDataset();
622 if (pDDS) {
624 }
625 else {
626 return 0;
627 }
628}
629
630void NCMLParser::addCopyOfVariableAtCurrentScope(BaseType& varTemplate)
631{
632 // make sure the name is free
633 if (isNameAlreadyUsedAtCurrentScope(varTemplate.name())) {
634 THROW_NCML_PARSE_ERROR(getParseLineNumber(), "NCMLParser::addNewVariableAtCurrentScope:"
635 " Cannot add variable since a variable or attribute of the same name exists at current scope."
636 " Name= " + varTemplate.name());
637 }
638
639 // Also an internal error if the caller tries it.
640 if (!(isScopeCompositeVariable() || isScopeGlobal())) {
641 THROW_NCML_INTERNAL_ERROR(
642 "NCMLParser::addNewVariableAtCurrentScope: current scope not valid for adding variable. Scope="
643 + getTypedScopeString());
644 }
645
646 // OK, we know we can add it now. But to what?
647 if (_pVar) // Constructor variable
648 {
649 NCML_ASSERT_MSG(_pVar->is_constructor_type(), "Expected _pVar is a container type!");
650 _pVar->add_var(&varTemplate);
651 }
652 else // Top level DDS for current dataset
653 {
654 BESDEBUG("ncml",
655 "Adding new variable to DDS top level. Variable name=" << varTemplate.name() << " and typename=" << varTemplate.type_name() << endl);
656 DDS* pDDS = getDDSForCurrentDataset();
657 pDDS->add_var(&varTemplate);
658 }
659}
660
661void NCMLParser::deleteVariableAtCurrentScope(const string& name)
662{
663 if (!(isScopeCompositeVariable() || isScopeGlobal())) {
664 THROW_NCML_INTERNAL_ERROR(
665 "NCMLParser::deleteVariableAtCurrentScope called when we do not have a variable container at current scope!");
666 }
667
668 if (_pVar) // In container?
669 {
670 // Given interfaces, unfortunately it needs to be a Structure or we can't do this operation.
671 Structure* pVarContainer = dynamic_cast<Structure*>(_pVar);
672 if (!pVarContainer) {
673 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
674 "NCMLParser::deleteVariableAtCurrentScope called with _pVar not a "
675 "Structure class variable! "
676 "We can only delete variables from top DDS or within a Structure now. scope="
677 + getTypedScopeString());
678 }
679 // First, make sure it exists so we can warn if not. The call fails silently.
680 BaseType* pToBeNuked = pVarContainer->var(name);
681 if (!pToBeNuked) {
682 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
683 "Tried to remove variable from a Structure, but couldn't find the variable with name=" + name
684 + "at scope=" + getScopeString());
685 }
686 // Silently fails, so assume it worked.
687 pVarContainer->del_var(name);
688 }
689 else // Global
690 {
691 // we better have a DDS if we get here!
692 DDS* pDDS = getDDSForCurrentDataset();
693 VALID_PTR(pDDS);
694 pDDS->del_var(name);
695 }
696}
697
698BaseType*
699NCMLParser::getCurrentVariable() const
700{
701 return _pVar;
702}
703
704void NCMLParser::setCurrentVariable(BaseType* pVar)
705{
706 _pVar = pVar;
707 if (pVar) // got a variable
708 {
709 setCurrentAttrTable(&(pVar->get_attr_table()));
710 }
711 else if (getDDSForCurrentDataset()) // null pvar but we have a dds, use global table
712 {
713 DDS* dds = getDDSForCurrentDataset();
714 setCurrentAttrTable(&(dds->get_attr_table()));
715 }
716 else // just clear it out, no context
717 {
718 setCurrentAttrTable(0);
719 }
720}
721
722bool NCMLParser::typeCheckDAPVariable(const BaseType& var, const string& expectedType)
723{
724 // Match all types.
725 if (expectedType.empty()) {
726 return true;
727 }
728 else {
729 // If the type specifies a Structure, it better be a Constructor type.
730 if (expectedType == STRUCTURE_TYPE) {
731 // Calls like is_constructor_type really should be const...
732 BaseType& varSemanticConst = const_cast<BaseType&>(var);
733 return varSemanticConst.is_constructor_type();
734 }
735 else {
736 return (var.type_name() == expectedType);
737 }
738 }
739}
740
741AttrTable*
742NCMLParser::getCurrentAttrTable() const
743{
744 // will load the DDS of current dataset if required.
745 // The end result of calling AttrTableLazyPtr::get() is that the NCMLParser
746 // field '_pAttrTable' points to the DDS' AttrTable.
747 return _pCurrentTable.get();
748}
749
750void NCMLParser::setCurrentAttrTable(AttrTable* pAT)
751{
752 _pCurrentTable.set(pAT);
753}
754
755AttrTable*
756NCMLParser::getGlobalAttrTable() const
757{
758 AttrTable* pAT = 0;
759 DDS* pDDS = getDDSForCurrentDataset();
760 if (pDDS) {
761 pAT = &(pDDS->get_attr_table());
762 }
763 return pAT;
764}
765
766bool NCMLParser::attributeExistsAtCurrentScope(const string& name) const
767{
768 // Lookup the given attribute in the current table.
769 AttrTable::Attr_iter attr;
770 bool foundIt = findAttribute(name, attr);
771 return foundIt;
772}
773
774bool NCMLParser::findAttribute(const string& name, AttrTable::Attr_iter& attr) const
775{
776 AttrTable* pAT = getCurrentAttrTable();
777 if (pAT) {
778 attr = pAT->simple_find(name);
779 return (attr != pAT->attr_end());
780 }
781 else {
782 return false;
783 }
784}
785
786int NCMLParser::tokenizeAttrValues(vector<string>& tokens, const string& values, const string& dapAttrTypeName,
787 const string& separator)
788{
789 // Convert the type string into a DAP AttrType to be sure
790 AttrType dapType = String_to_AttrType(dapAttrTypeName);
791 if (dapType == Attr_unknown) {
792 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
793 "Attempting to tokenize attribute value failed since"
794 " we found an unknown internal DAP type=" + dapAttrTypeName
795 + " for the current fully qualified attribute=" + _scope.getScopeString());
796 }
797
798 // If we're valid type, tokenize us according to type.
799 int numTokens = tokenizeValuesForDAPType(tokens, values, dapType, separator);
800 if (numTokens == 0 && ((dapType == Attr_string) || (dapType == Attr_url) || (dapType == Attr_other_xml))) {
801 tokens.push_back(""); // 0 tokens will cause a problem later, so push empty string!
802 }
803
804 // Now type check the tokens are valid strings for the type.
806
807#if DEBUG_NCML_PARSER_INTERNALS
808
809 if (separator != NCMLUtil::WHITESPACE) {
810 BESDEBUG("ncml", "Got non-default separators for tokenize. separator=\"" << separator << "\"" << endl);
811 }
812
813 string msg;
814 for (unsigned int i = 0; i < tokens.size(); i++) {
815 if (i > 0) {
816 msg += ",";
817 }
818 msg += "\"";
819 msg += tokens[i];
820 msg += "\"";
821 }
822 BESDEBUG("ncml", "Tokenize got " << numTokens << " tokens:\n" << msg << endl);
823
824#endif // DEBUG_NCML_PARSER_INTERNALS
825
826 return numTokens;
827}
828
829int NCMLParser::tokenizeValuesForDAPType(vector<string>& tokens, const string& values, AttrType dapType,
830 const string& separator)
831{
832 tokens.resize(0); // Start empty.
833 int numTokens = 0;
834
835 if (dapType == Attr_unknown) {
836 // Do out best to recover....
837 BESDEBUG("ncml",
838 "Warning: tokenizeValuesForDAPType() got unknown DAP type! Attempting to continue..." << endl);
839 tokens.push_back(values);
840 numTokens = 1;
841 }
842 else if (dapType == Attr_container) {
843 // Not supposed to have values, just push empty string....
844 BESDEBUG("ncml", "Warning: tokenizeValuesForDAPType() got container type, we should not have values!" << endl);
845 tokens.push_back("");
846 numTokens = 1;
847 }
848 else if (dapType == Attr_string) {
849 // Don't use whitespace as default separator for strings.
850 // If they explicitly set it, then fine.
851 // We don't trim strings either. All whitespace, trailing or leading, is left.
852 numTokens = NCMLUtil::tokenize(values, tokens, separator);
853 }
854 else // For all other atomic types, do a split on separator
855 {
856 // Use whitespace as default if sep not set
857 string sep = ((separator.empty()) ? (NCMLUtil::WHITESPACE) : (separator));
858 numTokens = NCMLUtil::tokenize(values, tokens, sep);
859 NCMLUtil::trimAll(tokens);
860 }
861 return numTokens;
862}
863
865
866// Used below to convert NcML data type to a DAP data type.
867typedef std::map<string, string> TypeConverter;
868
869// If true, we allow the specification of a DAP scalar type
870// in a location expecting an NcML type.
871static const bool ALLOW_DAP_TYPES_AS_NCML_TYPES = true;
872
873/*
874 * Causes a small memory leak that shows up in Valgrind but is ignored as the leak does not as grow since
875 * TypeConverter object is only allocated once per process. SBL 10.31.19
876 *
877 * Ncml DataType:
878 <xsd:enumeration value="char"/>
879 <xsd:enumeration value="byte"/>
880 <xsd:enumeration value="short"/>
881 <xsd:enumeration value="int"/>
882 <xsd:enumeration value="long"/>
883 <xsd:enumeration value="float"/>
884 <xsd:enumeration value="double"/>
885 <xsd:enumeration value="String"/>
886 <xsd:enumeration value="string"/>
887 <xsd:enumeration value="Structure"/>
888 */
889static TypeConverter* makeTypeConverter()
890{
891 TypeConverter* ptc = new TypeConverter();
892 TypeConverter& tc = *ptc;
893 // NcML to DAP conversions
894 tc["char"] = "Byte"; // char is a C char, let's use a Byte and special parse it as a char not numeric
895 tc["byte"] = "Int16"; // Since NcML byte's can be signed, we must promote them to not lose the sign bit.
896 tc["short"] = "Int16";
897 tc["int"] = "Int32";
898 tc["long"] = "Int32"; // not sure of this one
899 tc["float"] = "Float32";
900 tc["double"] = "Float64";
901 tc["string"] = "String"; // allow lower case.
902 tc["String"] = "String";
903 tc["Structure"] = "Structure";
904 tc["structure"] = "Structure"; // allow lower case for this as well
905
906 // If we allow DAP types to be specified directly,
907 // then make them be passthroughs in the converter...
908 if (ALLOW_DAP_TYPES_AS_NCML_TYPES) {
909 tc["Byte"] = "Byte"; // DAP Byte can fit in Byte tho, unlike NcML "byte"!
910 tc["Int16"] = "Int16";
911 tc["UInt16"] = "UInt16";
912 tc["Int32"] = "Int32";
913 tc["UInt32"] = "UInt32";
914 tc["Float32"] = "Float32";
915 tc["Float64"] = "Float64";
916 // allow both url cases due to old bug where "Url" is returned in dds rather then DAP2 spec "URL"
917 tc["Url"] = "URL";
918 tc["URL"] = "URL";
919 tc["OtherXML"] = "OtherXML"; // Pass it through
920 }
921
922 return ptc;
923}
924
925// Singleton
926static const TypeConverter& getTypeConverter()
927{
928 static TypeConverter* singleton = 0;
929 if (!singleton) {
930 singleton = makeTypeConverter();
931 }
932 return *singleton;
933}
934
935#if 0 // Unused right now... might be later, but I hate compiler warnings.
936// Is the given type a DAP type?
937static bool isDAPType(const string& type)
938{
939 return (String_to_AttrType(type) != Attr_unknown);
940}
941#endif // 0
942
943/* static */
944string NCMLParser::convertNcmlTypeToCanonicalType(const string& ncmlType)
945{
946
947#if 0
948 // OLD WAY - Disallows attributes that do not specify type
949 NCML_ASSERT_MSG(!daType.empty(), "Logic error: convertNcmlTypeToCanonicalType disallows empty() input.");
950#endif
951
952 // NEW WAY - If the attribute does not specify a type them the type is defaulted to "string"
953 string daType = ncmlType;
954 if(daType.empty())
955 daType = "string";
956
957 const TypeConverter& tc = getTypeConverter();
958 TypeConverter::const_iterator it = tc.find(daType);
959
960 if (it == tc.end()) {
961 return ""; // error condition
962 }
963 else {
964 return it->second;
965 }
966}
967
968void NCMLParser::checkDataIsValidForCanonicalTypeOrThrow(const string& type, const vector<string>& tokens) const
969{
970 /* Byte
971 Int16
972 UInt16
973 Int32
974 UInt32
975 Float32
976 Float64
977 String
978 URL
979 OtherXML
980 */
981 bool valid = true;
982 vector<string>::const_iterator it;
983 vector<string>::const_iterator endIt = tokens.end();
984 for (it = tokens.begin(); it != endIt; ++it) {
985 if (type == "Byte") {
986 valid &= check_byte(it->c_str());
987 }
988 else if (type == "Int16") {
989 valid &= check_int16(it->c_str());
990 }
991 else if (type == "UInt16") {
992 valid &= check_uint16(it->c_str());
993 }
994 else if (type == "Int32") {
995 valid &= check_int32(it->c_str());
996 }
997 else if (type == "UInt32") {
998 valid &= check_uint32(it->c_str());
999 }
1000 else if (type == "Float32") {
1001 valid &= check_float32(it->c_str());
1002 }
1003 else if (type == "Float64") {
1004 valid &= check_float64(it->c_str());
1005 }
1006 // Doh! The DAP2 specifies case as "URL" but internally libdap uses "Url" Allow both...
1007 else if (type == "URL" || type == "Url" || type == "String") {
1008 // TODO the DAP call check_url is currently a noop. do we want to check for well-formed URL?
1009 // This isn't an NcML type now, so straight up NcML users might enter URL as String anyway.
1010 valid &= (it->size() <= MAX_DAP_STRING_SIZE);
1011 if (!valid) {
1012 std::stringstream msg;
1013 msg << "Invalid Value: The " << type << " attribute value (not shown) exceeded max string length of "
1014 << MAX_DAP_STRING_SIZE << " at scope=" << _scope.getScopeString() << endl;
1015 THROW_NCML_PARSE_ERROR(getParseLineNumber(), msg.str());
1016 }
1017
1018 valid &= NCMLUtil::isAscii(*it);
1019 if (!valid) {
1020 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1021 "Invalid Value: The " + type + " attribute value (not shown) has an invalid non-ascii character.");
1022 }
1023 }
1024
1025 // For OtherXML, there's nothing to check so just say it's OK.
1026 // The SAX parser checks it for wellformedness already,
1027 // but ultimately it's just an arbitrary string...
1028 else if (type == "OtherXML") {
1029 valid &= true;
1030 }
1031
1032 else {
1033 // We probably shouldn't get here, but...
1034 THROW_NCML_INTERNAL_ERROR("checkDataIsValidForCanonicalType() got unknown data type=" + type);
1035 }
1036
1037 // Early throw so we know which token it was.
1038 if (!valid) {
1039 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1040 "Invalid Value given for type=" + type + " with value=" + (*it)
1041 + " was invalidly formed or out of range" + _scope.getScopeString());
1042 }
1043 }
1044 // All is good if we get here.
1045}
1046
1047void NCMLParser::clearAllAttrTables(DDS* dds)
1048{
1049 if (!dds) {
1050 return;
1051 }
1052
1053 // Blow away the global attribute table.
1054 dds->get_attr_table().erase();
1055
1056 // Hit all variables, recursing on containers.
1057 for (DDS::Vars_iter it = dds->var_begin(); it != dds->var_end(); ++it) {
1058 // this will clear not only *it's table, but it's children if it's composite.
1059 clearVariableMetadataRecursively(*it);
1060 }
1061}
1062
1063void NCMLParser::clearVariableMetadataRecursively(BaseType* var)
1064{
1065 VALID_PTR(var);
1066 // clear the table
1067 var->get_attr_table().erase();
1068
1069 if (var->is_constructor_type()) {
1070 Constructor *compositeVar = dynamic_cast<Constructor*>(var);
1071 if (!compositeVar) {
1072 THROW_NCML_INTERNAL_ERROR(
1073 "clearVariableMetadataRecursively: Unexpected cast error on dynamic_cast<Constructor*>");
1074 }
1075 for (Constructor::Vars_iter it = compositeVar->var_begin(); it != compositeVar->var_end(); ++it) {
1076 clearVariableMetadataRecursively(*it);
1077 }
1078 }
1079}
1080
1081void NCMLParser::enterScope(const string& name, ScopeStack::ScopeType type)
1082{
1083 _scope.push(name, type);
1084 BESDEBUG("ncml", "Entering scope: " << _scope.top().getTypedName() << endl);
1085 BESDEBUG("ncml", "New scope=\"" << _scope.getScopeString() << "\"" << endl);
1086}
1087
1088void NCMLParser::exitScope()
1089{
1090 NCML_ASSERT_MSG(!_scope.empty(), "Logic Error: Scope Stack Underflow!");
1091 BESDEBUG("ncml", "Exiting scope " << _scope.top().getTypedName() << endl);
1092 _scope.pop();
1093 BESDEBUG("ncml", "New scope=\"" << _scope.getScopeString() << "\"" << endl);
1094}
1095
1096void NCMLParser::printScope() const
1097{
1098 BESDEBUG("ncml", "Scope=\"" << _scope.getScopeString() << "\"" << endl);
1099}
1100
1101string NCMLParser::getScopeString() const
1102{
1103 return _scope.getScopeString();
1104}
1105
1106string NCMLParser::getTypedScopeString() const
1107{
1108 return _scope.getTypedScopeString();
1109}
1110
1111int NCMLParser::getScopeDepth() const
1112{
1113 return _scope.size();
1114}
1115void NCMLParser::pushElement(NCMLElement* elt)
1116{
1117 VALID_PTR(elt);
1118 _elementStack.push_back(elt);
1119 elt->ref(); // up the count!
1120}
1121
1122void NCMLParser::popElement()
1123{
1124 NCMLElement* elt = _elementStack.back();
1125 _elementStack.pop_back();
1126
1127 // Keep the toString around if we plan to nuke him
1128 string infoOnDeletedDude = ((elt->getRefCount() == 1) ? (elt->toString()) : (string("")));
1129
1130 // Drop the ref count. If that forced a delete, print out the saved string.
1131 if (elt->unref() == 0) {
1132 BESDEBUG("ncml:memory",
1133 "NCMLParser::popElement: ref count hit 0 so we deleted element=" << infoOnDeletedDude << endl);
1134 }
1135}
1136
1137NCMLElement*
1138NCMLParser::getCurrentElement() const
1139{
1140 if (_elementStack.empty()) {
1141 return 0;
1142 }
1143 else {
1144 return _elementStack.back();
1145 }
1146}
1147
1148void NCMLParser::clearElementStack()
1149{
1150 while (!_elementStack.empty()) {
1151 NCMLElement* elt = _elementStack.back();
1152 _elementStack.pop_back();
1153 // unref() them... The Factory will take care of dangling memory...
1154 elt->unref();
1155 }
1156 _elementStack.resize(0);
1157}
1158
1159void NCMLParser::processStartNCMLElement(const std::string& name, const XMLAttributeMap& attrs)
1160{
1161 // Store it in a shared ptr in case this function exceptions before we store it in the element stack.
1162 RCPtr<NCMLElement> elt = _elementFactory.makeElement(name, attrs, *this);
1163
1164 // If we actually created an element of the given type name
1165 if (elt.get()) {
1166 elt->handleBegin();
1167 // tell the container to push the raw element, which will also ref() it on success
1168 // otherwise ~RCPtr will unref() to 0 and thus nuke it on exception.
1169 pushElement(elt.get());
1170 }
1171 else // Unknown element...
1172 {
1173 if (sThrowExceptionOnUnknownElements) {
1174 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1175 "Unknown element type=" + name + " found in NcML parse with scope=" + _scope.getScopeString());
1176 }
1177 else {
1178 BESDEBUG("ncml", "Start of <" << name << "> element. Element unsupported, ignoring." << endl);
1179 }
1180 }
1181}
1182
1183void NCMLParser::processEndNCMLElement(const std::string& name)
1184{
1185 NCMLElement* elt = getCurrentElement();
1186 VALID_PTR(elt);
1187
1188 // If it matches the one on the top of the stack, then process and pop.
1189 if (elt->getTypeName() == name) {
1190 elt->handleEnd();
1191 popElement(); // handles delete
1192 }
1193 else // the names don't match, so just ignore it.
1194 {
1195 BESDEBUG("ncml", "End of <" << name << "> element unsupported currently, ignoring." << endl);
1196 }
1197}
1198
1199const DimensionElement*
1200NCMLParser::getDimensionAtLexicalScope(const string& dimName) const
1201{
1202 const DimensionElement* ret = 0;
1203 if (getCurrentDataset()) {
1204 ret = getCurrentDataset()->getDimensionInFullScope(dimName);
1205 }
1206 return ret;
1207}
1208
1209string NCMLParser::printAllDimensionsAtLexicalScope() const
1210{
1211 string ret("");
1212 NetcdfElement* dataset = getCurrentDataset();
1213 while (dataset) {
1214 ret += dataset->printDimensions();
1215 dataset = dataset->getParentDataset();
1216 }
1217 return ret;
1218}
1219
1220void NCMLParser::enterOtherXMLParsingState(OtherXMLParser* pOtherXMLParser)
1221{
1222 BESDEBUG("ncml", "Entering state for parsing OtherXML!" << endl);
1223 _pOtherXMLParser = pOtherXMLParser;
1224}
1225
1226bool NCMLParser::isParsingOtherXML() const
1227{
1228 return _pOtherXMLParser;
1229}
1230
1231void NCMLParser::cleanup()
1232{
1233 // The only memory we own is the _response, which is in an unique_ptr so will
1234 // either be returned to caller in parse() and cleared, or else
1235 // delete'd by our dtor via unique_ptr
1236
1237 // All other objects point into _response temporarily, so nothing to destroy there.
1238
1239 // Just for completeness.
1240 resetParseState();
1241}
1242
1243} // namespace ncml_module
1244
Represents an OPeNDAP DAP response object within the BES.
Represents an OPeNDAP DataDDS DAP2 data object within the BES.
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static libdap::BaseType * getVariableNoRecurse(const libdap::DDS &dds, const std::string &name)
static std::unique_ptr< BESDapResponse > makeResponseForType(ResponseType type)
Definition: DDSLoader.cc:431
void loadInto(const std::string &location, ResponseType type, BESDapResponse *pResponse)
Load a DDX or DataDDS response into the given pResponse object, which must be non-null.
Definition: DDSLoader.cc:148
static bool checkResponseIsValidType(ResponseType type, BESDapResponse *pResponse)
Definition: DDSLoader.cc:470
void cleanup()
restore dhi to clean state
Definition: DDSLoader.cc:256
A reference to an RCObject which automatically ref() and deref() on creation and destruction.
Definition: RCObject.h:284
void set(AttrTable *pAT)
Definition: NCMLParser.cc:99
AttrTable * get() const
Definition: NCMLParser.cc:91
RCPtr< NCMLElement > makeElement(const std::string &eltTypeName, const XMLAttributeMap &attrs, NCMLParser &parser)
Definition: NCMLElement.cc:117
Base class for NcML element concrete classes.
Definition: NCMLElement.h:61
virtual void handleContent(const std::string &content)
Definition: NCMLElement.cc:165
virtual const std::string & getTypeName() const =0
virtual void onParseWarning(std::string msg)
Definition: NCMLParser.cc:346
virtual void onEndElement(const std::string &name)
Definition: NCMLParser.cc:253
const XMLNamespaceStack & getXMLNamespaceStack() const
Definition: NCMLParser.cc:206
virtual void onParseError(std::string msg)
Definition: NCMLParser.cc:352
int getParseLineNumber() const
Definition: NCMLParser.cc:200
virtual void onStartElement(const std::string &name, const XMLAttributeMap &attrs)
Definition: NCMLParser.cc:221
static string convertNcmlTypeToCanonicalType(const string &ncmlType)
Definition: NCMLParser.cc:944
virtual void onStartElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri, const XMLAttributeMap &attributes, const XMLNamespaceMap &namespaces)
Definition: NCMLParser.cc:282
void checkDataIsValidForCanonicalTypeOrThrow(const string &type, const vector< string > &tokens) const
Make sure the given tokens are valid for the listed type. For example, makes sure floats are well for...
Definition: NCMLParser.cc:968
void parseInto(const string &ncmlFilename, agg_util::DDSLoader::ResponseType responseType, BESDapResponse *response)
Same as parse, but the response object to parse into is passed down by the caller rather than created...
Definition: NCMLParser.cc:161
static const string STRUCTURE_TYPE
Definition: NCMLParser.h:577
virtual void onEndElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri)
Definition: NCMLParser.cc:298
std::unique_ptr< BESDapResponse > parse(const std::string &ncmlFilename, agg_util::DDSLoader::ResponseType type)
Parse the NcML filename, returning a newly allocated DDS response containing the underlying dataset t...
Definition: NCMLParser.cc:148
virtual void onCharacters(const std::string &content)
Definition: NCMLParser.cc:329
NCMLParser(agg_util::DDSLoader &loader)
Create a structure that can parse an NCML filename and returned a transformed response of requested t...
Definition: NCMLParser.cc:134
virtual void setParseLineNumber(int line)
Definition: NCMLParser.cc:358
static int tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters=" \t")
Definition: NCMLUtil.cc:58
static const std::string WHITESPACE
Definition: NCMLUtil.h:80
static void trimAll(std::vector< std::string > &tokens, const std::string &trimChars=WHITESPACE)
Definition: NCMLUtil.cc:127
static bool isAscii(const std::string &str)
Definition: NCMLUtil.cc:94
Concrete class for NcML <netcdf> element.
Definition: NetcdfElement.h:64
void unborrowResponseObject(BESDapResponse *pResponse)
const DimensionElement * getDimensionInFullScope(const std::string &name) const
virtual const libdap::DDS * getDDS() const
void borrowResponseObject(BESDapResponse *pResponse)
AggregationElement * getChildAggregation() const
virtual void onStartElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri, const XMLAttributeMap &attributes, const XMLNamespaceMap &namespaces)
virtual void onEndElement(const std::string &name)
virtual void onStartElement(const std::string &name, const XMLAttributeMap &attrs)
virtual void onCharacters(const std::string &content)
virtual void onEndElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri)
Wrapper for libxml SAX parser C callbacks into C++.
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
std::string getTypedScopeString() const
Definition: ScopeStack.cc:109
std::string getScopeString() const
Definition: ScopeStack.cc:96
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...