bes Updated for version 3.20.13
AggregationElement.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29
30#include "config.h"
31
32#include <sstream>
33#include <fstream>
34#include <sys/stat.h>
35
36#include "AggregationElement.h"
37#include "AggMemberDatasetUsingLocationRef.h" // agg_util
38#include "AggMemberDatasetSharedDDSWrapper.h" // agg_util
39#include "AggregationUtil.h" // agg_util
40#include "ArrayAggregateOnOuterDimension.h" // agg_util
41#include "ArrayJoinExistingAggregation.h" // agg_util
42#include "GridAggregateOnOuterDimension.h" // agg_util
43#include "GridJoinExistingAggregation.h" // agg_util
44#include "AggMemberDatasetDimensionCache.h"
45
46#include <libdap/AttrTable.h> // libdap
47#include <libdap/Array.h> // libdap
48#include <libdap/AttrTable.h> // libdap
49#include "DDSAccessInterface.h" // agg_util
50#include "Dimension.h" // agg_util
51#include "DimensionElement.h"
52#include <libdap/Grid.h> // libdap
53#include "MyBaseTypeFactory.h"
54#include "NCMLBaseArray.h"
55#include "NCMLDebug.h"
56#include "NCMLParser.h"
57#include "NetcdfElement.h"
58#include "ScanElement.h"
59#include "BESDebug.h"
60#include "BESStopWatch.h"
61
64using agg_util::AMDList;
69
70using namespace std;
71
72namespace ncml_module {
73const string AggregationElement::_sTypeName = "aggregation";
74
75const vector<string> AggregationElement::_sValidAttrs = getValidAttributes();
76
77AggregationElement::AggregationElement() :
78 NCMLElement(0), _type(""), _dimName(""), _recheckEvery(""), _parent(0), _datasets(), _scanners(), _aggVars(), _gotVariableAggElement(
79 false), _wasAggregatedMapAddedForJoinExistingGrid(false), _coordinateAxisType("")
80{
81}
82
83AggregationElement::AggregationElement(const AggregationElement& proto) :
84 RCObjectInterface(), NCMLElement(proto), _type(proto._type), _dimName(proto._dimName), _recheckEvery(
85 proto._recheckEvery), _parent(proto._parent) // my parent is the same too... is this safe without a true weak reference?
86 , _datasets() // deep copy below
87 , _scanners() // deep copy below
88 , _aggVars(proto._aggVars), _gotVariableAggElement(false), _wasAggregatedMapAddedForJoinExistingGrid(false), _coordinateAxisType(
89 "")
90{
91 // Deep copy all the datasets and add them to me...
92 // This is potentially expensive in memory for large datasets, so let's tell someone.
93 if (!proto._datasets.empty()) {
94 BESDEBUG("ncml",
95 "WARNING: AggregationElement copy ctor is deep copying all contained datasets! This might be memory and time intensive!");
96 }
97
98 // Clone the actual members
99 _datasets.reserve(proto._datasets.size());
100 for (vector<NetcdfElement*>::const_iterator it = proto._datasets.begin(); it != proto._datasets.end(); ++it) {
101 const NetcdfElement* elt = (*it);
102 addChildDataset(elt->clone());
103 }
104 NCML_ASSERT(_datasets.size() == proto._datasets.size());
105
106 _scanners.reserve(proto._scanners.size());
107 for (vector<ScanElement*>::const_iterator it = proto._scanners.begin(); it != proto._scanners.end(); ++it) {
108 const ScanElement* elt = (*it);
109 addScanElement(elt->clone());
110 }
111 NCML_ASSERT(_scanners.size() == proto._scanners.size());
112}
113
114AggregationElement::~AggregationElement()
115{
116 BESDEBUG("ncml:memory", "~AggregationElement called...");
117 _type = "";
118 _dimName = "";
119 _recheckEvery = "";
120 _parent = 0;
121 _wasAggregatedMapAddedForJoinExistingGrid = false;
122
123 // Release strong references to the contained netcdfelements....
124 while (!_datasets.empty()) {
125 NetcdfElement* elt = _datasets.back();
126 _datasets.pop_back();
127 elt->unref(); // Will be deleted if the last strong reference
128 }
129
130 // And the scan elements
131 while (!_scanners.empty()) {
132 ScanElement* elt = _scanners.back();
133 _scanners.pop_back();
134 elt->unref(); // Will be deleted if the last strong reference
135 }
136}
137
138const string&
140{
141 return _sTypeName;
142}
143
146{
147 return new AggregationElement(*this);
148}
149
151{
152 _type = attrs.getValueForLocalNameOrDefault("type", "");
153 _dimName = attrs.getValueForLocalNameOrDefault("dimName", "");
154 _recheckEvery = attrs.getValueForLocalNameOrDefault("recheckEvery", "");
155
156 // default is to print errors and throw which we want.
157 validateAttributes(attrs, _sValidAttrs);
158}
159
161{
162#if 0
163 BESStopWatch sw;
164 if (BESISDEBUG( TIMING_LOG_KEY ))
165 sw.start("AggregationElement::handleBegin", "");
166#endif
167
168 NCML_ASSERT(!getParentDataset());
169
170 // Check that the immediate parent element is netcdf since we cannot put an aggregation anywhere else.
171 if (!_parser->isScopeNetcdf()) {
172 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
173 "Got an <aggregation> = " + toString()
174 + " at incorrect parse location. They can only be direct children of <netcdf>. Scope="
175 + _parser->getScopeString());
176 }
177
178 NetcdfElement* dataset = _parser->getCurrentDataset();
179 NCML_ASSERT_MSG(dataset,
180 "We expected a non-noll current dataset while processing AggregationElement::handleBegin() for " + toString());
181 // If the enclosing dataset already has an aggregation, this is a parse error.
182 if (dataset->getChildAggregation()) {
183 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
184 "Got <aggregation> = " + toString() + " but the enclosing dataset = " + dataset->toString()
185 + " already had an aggregation set! There can be only one!");
186 }
187 // Set me as the aggregation for the current dataset.
188 // This will set my parent and also ref() me.
189 dataset->setChildAggregation(this);
190}
191
192void AggregationElement::handleContent(const string& content)
193{
194 // Aggregations do not specify content!
195 if (!NCMLUtil::isAllWhitespace(content)) {
196 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
197 "Got non-whitespace for content and didn't expect it. Element=" + toString() + " content=\"" + content
198 + "\"");
199 }
200}
201
203{
204#if 1
205 BESStopWatch sw;
206 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::handleEnd", "");
207#endif
208 // Handle the actual processing!!
209 BESDEBUG("ncml", "AggregationElement::handleEnd() - Processing the aggregation!!" << endl);
210
211 if (isUnionAggregation()) {
212 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isUnionAggregation" << endl);
213 processUnion();
214 }
215 else if (isJoinNewAggregation()) {
216 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isJoinNewAggregation" << endl);
217 processJoinNew();
218 }
219 else if (isJoinExistingAggregation()) {
220 BESDEBUG("ncml2", "AggregationElement::handleEnd() - isJoinExistingAggregation" << endl);
221 processJoinExisting();
222 }
223 else if (_type == "forecastModelRunCollection" || _type == "forecastModelSingleRunCollection") {
224 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
225 "Sorry, we do not implement the forecastModelRunCollection aggregations in this version of the NCML Module!");
226 }
227 else {
228 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
229 "Unknown aggregation type=" + _type + " at scope=" + _parser->getScopeString());
230 }
231}
232
234{
235 return "<" + _sTypeName + " type=\"" + _type + "\"" + printAttributeIfNotEmpty("dimName", _dimName)
236 + printAttributeIfNotEmpty("recheckEvery", _recheckEvery) + ">";
237}
238
239bool AggregationElement::isJoinNewAggregation() const
240{
241 return (_type == "joinNew");
242}
243
244bool AggregationElement::isUnionAggregation() const
245{
246 return (_type == "union");
247}
248
249bool AggregationElement::isJoinExistingAggregation() const
250{
251 return (_type == "joinExisting");
252}
253
255{
256 VALID_PTR(pDataset);
257 BESDEBUG("ncml", "AggregationElement: adding child dataset: " << pDataset->toString() << endl);
258
259 // Add as a strong reference.
260 pDataset->ref();
261 _datasets.push_back(pDataset);
262
263 // also set a weak reference to us as the parent
264 pDataset->setParentAggregation(this);
265}
266
268{
269 if (isAggregationVariable(name)) {
270 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
271 "Tried to add an aggregation variable twice: name=" + name + " at scope=" + _parser->getScopeString());
272 }
273 else {
274 _aggVars.push_back(name);
275 BESDEBUG("ncml", "Added aggregation variable name=" + name << endl);
276 }
277}
278
279bool AggregationElement::isAggregationVariable(const string& name) const
280{
281 bool ret = false;
282 AggVarIter endIt = endAggVarIter();
283 AggVarIter it = beginAggVarIter();
284 for (; it != endIt; ++it) {
285 if (name == *it) {
286 ret = true;
287 break;
288 }
289 }
290 return ret;
291}
292
293string AggregationElement::printAggregationVariables() const
294{
295 string ret("{ ");
296 AggVarIter endIt = endAggVarIter();
297 AggVarIter it = beginAggVarIter();
298 for (; it != endIt; ++it) {
299 ret += *it;
300 ret += " ";
301 }
302 ret += "}";
303 return ret;
304}
305
306AggregationElement::AggVarIter AggregationElement::beginAggVarIter() const
307{
308 return _aggVars.begin();
309}
310
311AggregationElement::AggVarIter AggregationElement::endAggVarIter() const
312{
313 return _aggVars.end();
314}
315
317{
318 return _gotVariableAggElement;
319}
320
322{
323 _gotVariableAggElement = true;
324}
325
327{
328 VALID_PTR(pScanner);
329 _scanners.push_back(pScanner);
330 pScanner->ref(); // strong ref
331 pScanner->setParent(this); // weak ref.
332}
333
335{
336 BESDEBUG("ncml", "AggregationElement::processParentDatasetComplete() called..." << endl);
337
338 if (_type == "joinNew") {
339 processParentDatasetCompleteForJoinNew();
340 }
341 else if (_type == "joinExisting") {
342 processParentDatasetCompleteForJoinExisting();
343 }
344}
345
348
351{
352 NetcdfElement* ret = getParentDataset();
353 _parent = parent;
354 return ret;
355}
356
357void AggregationElement::processUnion()
358{
359 BESDEBUG("ncml", "Processing a union aggregation..." << endl);
360
361 // Merge all the dimensions... For now, it is a parse error if a dimension
362 // with the same name exists but has a different size.
363 // Since DAP2 doesn't have dimensions, we can't do this in agg_util, but
364 // have to do it here.
365 mergeDimensions();
366
367 // Merge the attributes and variables in all the DDS's into our parent DDS....
368 vector<const DDS*> datasetsInOrder;
369 // NOTE WELL: this will LOAD ALL DDX's, but there's no choice for union.
370 // This doesn't load data, just the metadata!
371 collectDatasetsInOrder(datasetsInOrder);
372 DDS* pUnion = 0;
373 if (getParentDataset()) {
374 pUnion = getParentDataset()->getDDS();
375 }
376 AggregationUtil::performUnionAggregation(pUnion, datasetsInOrder);
377}
378
379void AggregationElement::processJoinNew()
380{
381 BESStopWatch sw;
382 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinNew", "");
383
384 // This will run any child <scan> elements to prepare them.
385 processAnyScanElements();
386
387 BESDEBUG("ncml",
388 "AggregationElement::processJoinNew() - beginning joinNew on the following aggVars=" + printAggregationVariables() << endl);
389
390 // Union the dimensions of the child sets so they're available
391 BESDEBUG("ncml", "Merging dimensions from children into aggregated dataset..." << endl);
392 mergeDimensions();
393
394 // For now we will explicitly create the new dimension for lookups.
395 unsigned int newDimSize = _datasets.size(); // ASSUMES we find an aggVar in EVERY dataset!
396 getParentDataset()->addDimension(new DimensionElement(agg_util::Dimension(_dimName, newDimSize)));
397
398 // We need at least one dataset, so warn.
399 if (_datasets.empty()) {
400 THROW_NCML_PARSE_ERROR(line(), "In joinNew aggregation we cannot have zero datasets specified!");
401 }
402
403 // This is where the output variables go
404 DDS* pAggDDS = getParentDataset()->getDDS();
405 // The first dataset acts as the template for the remainder
406 DDS* pTemplateDDS = _datasets[0]->getDDS();
407 NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinNew() - NULL template dataset!");
408
409 // First, union the template's global attribute table into the output's table.
410 AggregationUtil::unionAttrsInto(&(pAggDDS->get_attr_table()), pTemplateDDS->get_attr_table());
411
412 // Then perform the aggregation for each variable...
413 // TODO REFACTOR OPTIMIZE We loop on variables, not the datasets.
414 // It might be more efficient to do all vars for each dataset
415 vector<string>::const_iterator endIt = _aggVars.end();
416 for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it) {
417 const string& varName = *it;
418 BESDEBUG("ncml",
419 "AggregationElement::processJoinNew() - Aggregating with joinNew on variable=" << varName << "..." << endl);
420 processJoinNewOnAggVar(pAggDDS, varName, *pTemplateDDS);
421 }
422
423 // Union any non-aggregated variables from the template dataset into the aggregated dataset
424 // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
425 // in the order they are declared in the NCML file, we need to track the current position
426 // where the last one was inserted. We can do that with a field in the AggregationUtil
427 // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
428 AggregationUtil::resetCVInsertionPosition();
429
430 // Union any non-aggregated variables from the template dataset into the aggregated dataset
431 AggregationUtil::unionAllVariablesInto(pAggDDS, *pTemplateDDS, /*add_at_top = */true);
432}
433
434#if 0
435// This function was used previously, but not now.
436// Leaving it in case we need it, but commented out
437// to deal with -werror compilation.
438
439/* File local helper for next function */
440static bool
441doAllScannersSpecifyNCoords(const vector<ScanElement*>& scanners)
442{
443 bool success = true;
444 for (vector<ScanElement*>::const_iterator it = scanners.begin();
445 it != scanners.end();
446 ++it)
447 {
448 VALID_PTR(*it);
449 if ((*it)->ncoords().empty())
450 {
451 success = false;
452 break;
453 }
454 }
455 return success;
456}
457#endif // 0
458
459void AggregationElement::processJoinExisting()
460{
461 BESDEBUG("ncml:2", "Called AggregationElement::processJoinExisting()...");
462
463 // Merge any scans into _datasets
464 processAnyScanElements();
465
466 // We need at least one dataset or it's an error
467 if (_datasets.empty()) {
468 THROW_NCML_PARSE_ERROR(line(), "In joinExisting aggregation we cannot have zero datasets specified!");
469 }
470
471 // We need to know the size of the joinExisting dimension
472 // for all granule datasets.
473 // Make sure that we either get them from:
474 // 1) ncoords specified
475 // 2) Dimension cache file previously created
476 // 3) Load them the slow way and cache the result
477 AMDList granuleList;
478 granuleList.reserve(_datasets.size());
479 fillDimensionCacheForJoinExistingDimension(granuleList, _dimName);
480
481 // Figure out the cardinality of the aggregated dimension
482 // and add it into the parent dataset's scope for lookups.
483 addNewDimensionForJoinExisting(granuleList);
484
485 // Union any declared dimensions of the child sets so they're available,
486 // but be carefuly to skip the join dimension since we already created it
487 // new ourselves with the post-aggregation value!
488 BESDEBUG("ncml:2", "Merging dimensions from children into aggregated dataset..." << endl);
489 mergeDimensions(true, _dimName);
490
491 // This is where the output variables go
492 DDS* pAggDDS = getParentDataset()->getDDS();
493
494 // The first dataset acts as the template
495 DDS* pTemplateDDS = _datasets[0]->getDDS();
496 NCML_ASSERT_MSG(pTemplateDDS, "AggregationElement::processJoinExisting(): NULL template dataset!");
497
498 // First, union the template's global attribute table into the output's table.
499 AggregationUtil::unionAttrsInto(&(pAggDDS->get_attr_table()), pTemplateDDS->get_attr_table());
500
501 // Fills in the _aggVars list properly.
502 decideWhichVariablesToJoinExist(*pTemplateDDS);
503
504 // For each variable in the to-be-aggregated list, create the
505 // aggregation variable in the output based on the granule list.
506 vector<string>::const_iterator endIt = _aggVars.end();
507 for (vector<string>::const_iterator it = _aggVars.begin(); it != endIt; ++it) {
508 const string& varName = *it;
509 BESDEBUG("ncml", "Aggregating with joinExisting on variable=" << varName << "..." << endl);
510 processJoinExistingOnAggVar(pAggDDS, varName, *pTemplateDDS);
511 }
512
513 // Union in the remaining unaggregated variables from the template DDS
514 // since they are likely to be coordinate variables.
515 // Handle variableAgg properly.
516 unionAddAllRequiredNonAggregatedVariablesFrom(*pTemplateDDS);
517}
518
519void AggregationElement::unionAddAllRequiredNonAggregatedVariablesFrom(const DDS& templateDDS)
520{
521 // Union any non-aggregated variables from the template dataset into the aggregated dataset
522 // Because we want the joinExistingaggregation to build up the Coordinate Variables (CVs)
523 // in the order they are declared in the NCML file, we need to track the current position
524 // where the last one was inserted. We can do that with a field in the AggregationUtil
525 // class. Here we reset that field so that it starts at position 0. 12.13.11 jhrg
526 AggregationUtil::resetCVInsertionPosition();
527
528 // If we didn't get a variable agg for a joinExisting, then union them all.
529 if (isJoinExistingAggregation()) {
530 if (!gotVariableAggElement()) {
531 AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
532 }
533 else {
534 // THROW ONLY IF A GRID since we need to implement the path that handles maps
535 }
536 } // if isJoinExistingAggregation
537
538 else if (isJoinNewAggregation())
539 // joinNew requires the list of vars, so for this one just union them all in as well.
540 {
541 AggregationUtil::unionAllVariablesInto(getParentDataset()->getDDS(), templateDDS, /*add_at_top = */true);
542 }
543}
544
545void AggregationElement::decideWhichVariablesToJoinExist(const DDS& templateDDS)
546{
547 // If they were not specified by hand, then discover them.
548 if (_aggVars.empty()) {
549 BESDEBUG("ncml",
550 "Searching the the template DDS for variables with outer " "dimension matching the join dimension = " << _dimName << " in order to add them to the aggregation output list." << endl);
551
552 // the prototype (first dataset) will define the set of vars to be aggregated.
553 // Note: the c.v. dim(dim) _must_ exist, either in all datasets or in the agg itself.
554 vector<string> matchingVars;
555 findVariablesWithOuterDimensionName(matchingVars, templateDDS, _dimName);
556 for (vector<string>::const_iterator it = matchingVars.begin(); it != matchingVars.end(); ++it) {
558 }
559 }
560 else // make sure the listed ones are valid
561 {
562 BESDEBUG("ncml",
563 "joinExist aggregation had variableAgg specified... " "Validating these variables have outer dimension named " << _dimName << endl);
564
565 for (vector<string>::const_iterator it = _aggVars.begin(); it != _aggVars.end(); ++it) {
566 BaseType* pVar = AggregationUtil::findVariableAtDDSTopLevel(templateDDS, *it);
567
568 // First, it must exist!
569 if (!pVar) {
570 std::ostringstream msg;
571 msg << "Error validating the variableAgg list. The variable named " << *it
572 << " was not found in the top-level DDS!";
573 THROW_NCML_PARSE_ERROR(line(), msg.str());
574 }
575
576 // Next see that it can be aggregated
577 Array* pArray = AggregationUtil::getAsArrayIfPossible(pVar);
578 if (!pArray) {
579 std::ostringstream msg;
580 msg << "The declared variableAgg aggregation variable named " << *it
581 << " was not of a type able to be aggregated!";
582 THROW_NCML_PARSE_ERROR(line(), msg.str());
583 }
584
585 // Make sure the dimension name matches.
586 if (pArray->dimension_name(pArray->dim_begin()) != _dimName) {
587 std::ostringstream msg;
588 msg << "The declared variableAgg variable named " << *it << " did not match the outer dimension name "
589 << _dimName << " for this joinExisting aggregation!";
590 THROW_NCML_PARSE_ERROR(line(), msg.str());
591 }
592
593 // Otherwise, it's good, so let the log know.
594 std::ostringstream msg;
595 msg << "The variable named " << *it << " is a valid joinExisting variable. Will be added to output.";
596 BESDEBUG("ncml", msg.str() << endl);
597 } // for loop over user-declared variableAgg list.
598 }
599}
600
601//
602void AggregationElement::fillDimensionCacheForJoinExistingDimension(AMDList& granuleList,
603 const std::string& /* aggDimName */)
604{
605 // First, run down the dataset list (which has been expanded with scanners)
606 // and create the AMD list for them.
607 // for each entry in _dataset
608 vector<NetcdfElement*>::iterator endIt = _datasets.end();
609 for (vector<NetcdfElement*>::iterator it = _datasets.begin(); it != endIt; ++it) {
610 granuleList.push_back((*it)->getAggMemberDataset());
611 }
612
613 // Second, see if there is an ncoords for each of the datasets,
614 // and if so, for each one add it to the cache in the AMD.
615 if (doesFirstGranuleSpecifyNcoords()) {
616 // If so, check they all do or it's a user error.
617 if (!doAllGranulesSpecifyNcoords()) {
618 THROW_NCML_PARSE_ERROR(-1, "In a joinExisting aggregation we found that the first "
619 "granule specified an ncoords but not all of the others "
620 "did. Either all or none of them should have ncoords specified.");
621 }
622 // otherwise we're good, seed the cache from the ncoords
623 else {
624 seedDimensionCacheFromUserSpecs(granuleList);
625 }
626 }
627 else // look for cached dimension file or load dimensionalities from granules
628 {
629 BESStopWatch sw;
630 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("LOAD_AGGREGATION_DIMENSIONS_CACHE", "");
631
633
634 AMDList::iterator endIt = granuleList.end();
635 for (AMDList::iterator it = granuleList.begin(); it != endIt; ++it) {
636 AggMemberDataset *amd = (*it).get();
637 if(aggDimCache) {
638 BESDEBUG("ncml", "AggregationElement::fillDimensionCacheForJoinExistingDimension() - Loading dimension cache for: " << (*it)->getLocation() << "..." << endl);
639 aggDimCache->loadDimensionCache(amd);
640 }
641 else {
642 BESDEBUG("ncml", "AggregationElement::fillDimensionCacheForJoinExistingDimension() - " <<
643 "WARNING NcML Dimension Caching is not configured or is not working! Loading dimensions from DDS for dataset: " <<
644 (*it)->getLocation() << "" << endl);
646 }
647 }
648 }
649}
650
651
652
653
654
655bool AggregationElement::doesFirstGranuleSpecifyNcoords() const
656{
657 if (_datasets.size() > 0) {
658 return _datasets.at(0)->hasNcoords();
659 }
660 else {
661 return false;
662 }
663}
664
665bool AggregationElement::doAllGranulesSpecifyNcoords() const
666{
667 bool success = true;
668 vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
669 for (vector<NetcdfElement*>::const_iterator it = _datasets.begin(); it != endIt; ++it) {
670 success = success && (*it)->hasNcoords();
671 if (!success) {
672 break;
673 }
674 }
675 return success;
676}
677
678void AggregationElement::seedDimensionCacheFromUserSpecs(agg_util::AMDList& rGranuleList) const
679{
680 NCML_ASSERT(_datasets.size() == rGranuleList.size());
681
682 vector<NetcdfElement*>::const_iterator datasetIt;
683 AMDList::iterator amdIt;
684 for (datasetIt = _datasets.begin(), amdIt = rGranuleList.begin(); datasetIt != _datasets.end();
685 ++datasetIt, ++amdIt) {
686 // Make sure the attribute exists or warn the author
687 const NetcdfElement* pDataset = *datasetIt;
688 if (!pDataset->hasNcoords()) {
689 // This is an assumption of the
690 THROW_NCML_INTERNAL_ERROR("Expected netcdf element member of a joinExisting "
691 "aggregation to have the ncoords attribute specified "
692 "but it did not.");
693 }
694 unsigned int ncoords = pDataset->getNcoordsAsUnsignedInt();
695 RCPtr<AggMemberDataset> pAMD = *amdIt;
696 VALID_PTR(pAMD.get());
698 dim.name = _dimName;
699 dim.size = ncoords;
700 pAMD->setDimensionCacheFor(dim, true);
701
702 NCML_ASSERT_MSG((pAMD->isDimensionCached(dim.name) && pAMD->getCachedDimensionSize(dim.name) == dim.size),
703 "Dimension cache bug");
704 }
705 // make sure they stayed in sync
706 NCML_ASSERT(amdIt == rGranuleList.end());
707}
708
709
710// For now, just count up the ncoords...
711void AggregationElement::addNewDimensionForJoinExisting(const agg_util::AMDList& rGranuleList)
712{
713 // Sum up the cardinalities from AMD's
714 unsigned int aggDimSize = 0;
715 for (AMDList::const_iterator it = rGranuleList.begin(); it != rGranuleList.end(); ++it) {
716 NCML_ASSERT((*it)->isDimensionCached(_dimName));
717 aggDimSize += (*it)->getCachedDimensionSize(_dimName);
718 }
719
720 // Error if the dimension exists in the output local scope already
721 NCML_ASSERT(getParentDataset());
722 NCML_ASSERT_MSG(!(getParentDataset()->getDimensionInLocalScope(_dimName)),
723 "AggregationElement::addNewDimensionForJoinExisting() found a dimension "
724 "named " + _dimName + " already but did not expect it!");
725
726 // Otherwise, create and add it in.
727 getParentDataset()->addDimension(new DimensionElement(agg_util::Dimension(_dimName, aggDimSize)));
728
729 // And tell the world at large
730 ostringstream oss;
731 oss << "Added joinExisting aggregation dimension "
732 " name=" << _dimName << " with aggregated size= " << aggDimSize;
733 BESDEBUG("ncml:2", oss.str());
734}
735
736void AggregationElement::findVariablesWithOuterDimensionName(vector<string>& oMatchingVars, const DDS& templateDDS,
737 const string& outerDimName) const
738{
739 for (DDS::Vars_iter it = const_cast<DDS&>(templateDDS).var_begin(); it != const_cast<DDS&>(templateDDS).var_end();
740 ++it) {
741 Array* pArray = AggregationUtil::getAsArrayIfPossible(*it);
742 // Only if it's an array or a grid data array
743 if (pArray && outerDimName == pArray->dimension_name(pArray->dim_begin())) {
744 oMatchingVars.push_back(pArray->name());
745 }
746 }
747}
748
749void AggregationElement::getParamsForJoinAggOnVariable(JoinAggParams* pOutParams, const DDS& /*aggOutputDDS*/,
750 const std::string& varName, const DDS& templateDDS)
751{
752 VALID_PTR(pOutParams);
753
754 // Look up the template variable.
755 pOutParams->_pAggVarTemplate = AggregationUtil::getVariableNoRecurse(templateDDS, varName);
756 if (!(pOutParams->_pAggVarTemplate)) {
757 THROW_NCML_PARSE_ERROR(line(),
758 " We could not find a template for the specified aggregation variable=" + varName
759 + " so we cannot continue the aggregation.");
760 }
761
762 // Dimension must exist already
763 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
764 NCML_ASSERT_MSG(pDim, "Didn't find a DimensionElement with the aggregation dimName=" + _dimName);
765 pOutParams->_pAggDim = &(pDim->getDimension());
766
767#if 0
768 // I don't follow the logic here. I think we should be able to add attributes to
769 // variables that already exist. This may be intended to protect against removing
770 // the variable on which the aggregation is performed 'over' (e.g., time) with a
771 // different variable. But it has the affect of also prohibiting that addition of
772 // an attribute on that variable. I'm removing it for now. jhrg 10/17/11
773
774 // Be sure the name isn't taken in the output DDS.
775 BaseType* pExists = AggregationUtil::getVariableNoRecurse(aggOutputDDS, varName);
776 NCML_ASSERT_MSG(!pExists,
777 "Failed since the name of the new variable to add (name="
778 + varName
779 + ") already exists in the "
780 " output aggregation DDS! What happened?!");
781#endif
782
783 // Get a vector of lazy loaders
784 // We will transfer AGM ownership to the calls so do not need to delete them.
785 collectAggMemberDatasets(pOutParams->_memberDatasets);
786}
787
788void AggregationElement::processJoinNewOnAggVar(DDS* pAggDDS, const std::string& varName, const DDS& templateDDS)
789{
790 BESStopWatch sw;
791 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinNewOnAggVar", "");
792
793 // Get the params we need to factory the actual aggregation subclass
794 JoinAggParams joinAggParams;
795 getParamsForJoinAggOnVariable(&joinAggParams, // output
796 *pAggDDS, varName, templateDDS);
797
798 // Factory out the proper subtype
799 BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
800 if (pAggVarTemplate->type() == dods_array_c) {
801 processAggVarJoinNewForArray(*pAggDDS, *(static_cast<Array*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
802 joinAggParams._memberDatasets);
803 }
804 else if (pAggVarTemplate->type() == dods_grid_c) {
805 processAggVarJoinNewForGrid(*pAggDDS, *(static_cast<Grid*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
806 joinAggParams._memberDatasets);
807 }
808 else {
809 THROW_NCML_PARSE_ERROR(line(),
810 "Got an aggregation variable not of type Array or Grid, but of: " + pAggVarTemplate->type_name()
811 + " which we cannot aggregate!");
812 }
813 // Nothing else to do for this var until the call to processParentDataset() is complete.
814}
815
816void AggregationElement::processJoinExistingOnAggVar(DDS* pAggDDS, const std::string& varName, const DDS& templateDDS)
817{
818
819 BESStopWatch sw;
820 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinExistingOnAggVar", "");
821
822 // Get the params we need to factory the actual aggregation subclass
823 JoinAggParams joinAggParams;
824 getParamsForJoinAggOnVariable(&joinAggParams, // output
825 *pAggDDS, varName, templateDDS);
826
827 // Factory out the proper subtype
828 BaseType* pAggVarTemplate = joinAggParams._pAggVarTemplate;
829 if (pAggVarTemplate->type() == dods_array_c) {
830 processAggVarJoinExistingForArray(*pAggDDS, *(static_cast<Array*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
831 joinAggParams._memberDatasets);
832 }
833 else if (pAggVarTemplate->type() == dods_grid_c) {
834 processAggVarJoinExistingForGrid(*pAggDDS, *(static_cast<Grid*>(pAggVarTemplate)), *(joinAggParams._pAggDim),
835 joinAggParams._memberDatasets);
836 }
837 else {
838 THROW_NCML_PARSE_ERROR(line(),
839 "Got an aggregation variable not of type Array or Grid, but of: " + pAggVarTemplate->type_name()
840 + " which we cannot aggregate!");
841 }
842 // Nothing else to do for this var until the call to processParentDataset() is complete.
843}
844
845void AggregationElement::processAggVarJoinNewForArray(DDS& aggDDS, const libdap::Array& arrayTemplate,
846 const agg_util::Dimension& dim, const AMDList& memberDatasets)
847{
848 BESStopWatch sw;
849 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processJoinExistingOnAggVar", "");
850
851 // Use the basic array getter to read adn get from top level DDS.
852 unique_ptr<agg_util::ArrayGetterInterface> arrayGetter(new agg_util::TopLevelArrayGetter());
853
854 unique_ptr<ArrayAggregateOnOuterDimension> pAggArray(
855 new ArrayAggregateOnOuterDimension(arrayTemplate, memberDatasets, std::move(arrayGetter), dim));
856
857 // Make sure we xfer ownership of contained dumb ptr.
858 NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected unique_ptr owner xfer, failed!");
859
860 // This will copy, unique_ptr will clear the prototype.
861 // NOTE: add_var() makes a copy.
862 // OPTIMIZE change to add_var_no_copy when it exists.
863 BESDEBUG("ncml",
864 "Adding new ArrayAggregateOnOuterDimension with name=" << arrayTemplate.name() << " to aggregated dataset!" << endl);
865
866 // Replaced the copy version of DDS::add_var() with the nocopy version. This saves
867 // a deep copy, but more importantly, is a workaround for a memory issue in the
868 // ArrayAggregateOnOuterDimension or ArrayAggreagtionBase copy constructor, which
869 // triggers a memory error deep in libdap::Array::Array(const Array&). See similar
870 // changes below. This and related changes fix HYRAX-803. jhrg 8/3/18
871
872 aggDDS.add_var_nocopy(pAggArray.release());
873}
874
875void AggregationElement::processAggVarJoinNewForGrid(DDS& aggDDS, const Grid& gridTemplate,
876 const agg_util::Dimension& dim, const AMDList& memberDatasets)
877{
878 BESStopWatch sw;
879 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinNewForGrid", "");
880
881 unique_ptr<GridAggregateOnOuterDimension> pAggGrid(
882 new GridAggregateOnOuterDimension(gridTemplate, dim, memberDatasets, _parser->getDDSLoader()));
883
884 // This will copy, unique_ptr will clear the prototype.
885 // OPTIMIZE change to add_var_no_copy when it exists.
886 BESDEBUG("ncml",
887 "Adding new GridAggregateOnOuterDimension with name=" << gridTemplate.name() << " to aggregated dataset!" << endl);
888
889 aggDDS.add_var_nocopy(pAggGrid.release());
890}
891
892void AggregationElement::processAggVarJoinExistingForArray(DDS& aggDDS, const libdap::Array& arrayTemplate,
893 const agg_util::Dimension& dim, const AMDList& memberDatasets)
894{
895
896 BESStopWatch sw;
897 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinExistingForArray", "");
898
899 // Use the basic array getter to read adn get from top level DDS.
900 unique_ptr<agg_util::ArrayGetterInterface> arrayGetter(new agg_util::TopLevelArrayGetter());
901
902 unique_ptr<ArrayJoinExistingAggregation> pAggArray(
903 new ArrayJoinExistingAggregation(arrayTemplate, memberDatasets, std::move(arrayGetter),
904 dim));
905
906 // Make sure we xfer ownership of contained dumb ptr.
907 NCML_ASSERT_MSG(!(arrayGetter.get()), "Expected unique_ptr owner xfer, failed!");
908
909 // This will copy, unique_ptr will clear the prototype.
910 // NOTE: add_var() makes a copy.
911 // OPTIMIZE change to add_var_no_copy when it exists.
912 BESDEBUG("ncml",
913 "Adding new ArrayJoinExistingAggregation with name=" << arrayTemplate.name() << " to aggregated dataset!" << endl);
914
915 aggDDS.add_var_nocopy(pAggArray.release());
916}
917
918void AggregationElement::processAggVarJoinExistingForGrid(DDS& aggDDS, const Grid& gridTemplate,
919 const agg_util::Dimension& dim, const AMDList& memberDatasets)
920{
921
922 BESStopWatch sw;
923 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processAggVarJoinExistingForGrid", "");
924
925 unique_ptr<GridJoinExistingAggregation> pAggGrid(
926 new GridJoinExistingAggregation(gridTemplate, memberDatasets, _parser->getDDSLoader(), dim));
927
928 BESDEBUG("ncml",
929 "Adding new GridJoinExistingAggregation with name=" << gridTemplate.name() << " to aggregated dataset!" << endl);
930
931 aggDDS.add_var_nocopy(pAggGrid.release());
932}
933
934void AggregationElement::processParentDatasetCompleteForJoinNew()
935{
936 BESStopWatch sw;
937 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processParentDatasetCompleteForJoinNew", "");
938
939 NetcdfElement* pParentDataset = getParentDataset();
940 VALID_PTR(pParentDataset);
941 DDS* pParentDDS = pParentDataset->getDDS();
942 VALID_PTR(pParentDDS);
943
944 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
945 NCML_ASSERT_MSG(pDim, " AggregationElement::processParentDatasetCompleteForJoinNew(): "
946 " didn't find a DimensionElement with the joinNew dimName=" + _dimName);
947 const agg_util::Dimension& dim = pDim->getDimension();
948
949 // See if there's an explicit or placeholder c.v. for this dimension name
950 BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, dim.name);
951 Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
952
953 // If name totally unused, we need to create a new c.v. and add it.
954 if (!pBT) {
955 pCV = createAndAddCoordinateVariableForNewDimension(*pParentDDS, dim);
956 NCML_ASSERT_MSG(pCV, "processParentDatasetCompleteForJoinNew(): "
957 "failed to create a new coordinate variable for dim=" + dim.name);
958 }
959 else // name exists: either it's explicit or deferred.
960 {
961 // See if the var we found with the dimension name is
962 // in the deferred variable list for the parent dataset:
963 VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pBT);
964 // If not, then we expect explicit values so just validate it's a proper c.v. for
965 // the aggregation (the dim) and set pCV to it if so.
966 if (!pVarElt) {
967 // will throw if not valid since we send true.
968 pCV = ensureVariableIsProperNewCoordinateVariable(pBT, dim, true);
969 VALID_PTR(pCV);
970 }
971 else // it was deferred, need to do some special work...
972 {
973 pCV = processDeferredCoordinateVariable(pBT, dim);
974 VALID_PTR(pCV);
975 }
976 }
977
978 // OK, either pCV is valid or we've unwound out by this point.
979 // If a coordinate axis type was specified, we need to add it now.
980 //
981 // This fiddles with the attribute for the CV. jhrg 10/17/11
982 if (!_coordinateAxisType.empty()) {
983 addCoordinateAxisType(*pCV, _coordinateAxisType);
984 }
985
986 // For each aggVar:
987 // If it's a Grid, add the coordinate variable as a new map vector.
988 // If it's an Array, do nothing -- we already added the CV as a sibling to the aggvar
989 AggVarIter it;
990 AggVarIter endIt = endAggVarIter();
991 for (it = beginAggVarIter(); it != endIt; ++it) {
992 const string& aggVar = *it;
993 BaseType* pBT = AggregationUtil::getVariableNoRecurse(*pParentDDS, aggVar);
995 if (pGrid) {
996 // Add the given map to the Grid as a copy
997 pGrid->prepend_map(pCV, true);
998 }
999 }
1000}
1001
1002void AggregationElement::processParentDatasetCompleteForJoinExisting()
1003{
1004 BESStopWatch sw;
1005 if (BESDebug::IsSet(TIMING_LOG_KEY)) sw.start("AggregationElement::processParentDatasetCompleteForJoinExisting", "");
1006
1007 NetcdfElement* pParentDataset = getParentDataset();
1008 VALID_PTR(pParentDataset);
1009 DDS* pAggDDS = pParentDataset->getDDS();
1010 VALID_PTR(pAggDDS);
1011
1012 const DimensionElement* pDim = getParentDataset()->getDimensionInLocalScope(_dimName);
1013 NCML_ASSERT_MSG(pDim, " Didn't find a DimensionElement with the joinExisting dimName=" + _dimName);
1014 const agg_util::Dimension& dim = pDim->getDimension();
1015
1016 // See if there's an explicit or placeholder c.v. for this dimension name
1017 BaseType* pDimNameVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, dim.name);
1018
1019 bool placeholderExists = false;
1020 Array* pCV = 0; // this will be a ptr to the actual (new or existing) c.v. in the *pParentDDS.
1021 // If the c.v. exists, then process it further.
1022 if (pDimNameVar) {
1023 // See if the var we found with the dimension name is
1024 // in the deferred variable list for the parent dataset:
1025 VariableElement* pVarElt = pParentDataset->findVariableElementForLibdapVar(pDimNameVar);
1026 // If not, then we expect explicit values so just validate it's a proper c.v. for
1027 // the aggregation (the dim) and set pCV to it if so.
1028 if (!pVarElt) {
1029 // will throw if not valid since we send true.
1030 pCV = ensureVariableIsProperNewCoordinateVariable(pDimNameVar, dim, true);
1031 VALID_PTR(pCV);
1032 placeholderExists = false;
1033 }
1034 else // it was deferred, need to do some special work below...
1035 {
1036 //pCV = processDeferredCoordinateVariable(pDimNameVar, dim);
1037 placeholderExists = true;
1038 }
1039 }
1040
1041 // For the scope of the next loop, this will be filled
1042 // with a new aggregated map variable when we didn't find the first Grid
1043 // and then pCV will refer to it until the function end.
1044 // If created, it will be used as the map vector for all Grid's.
1045 unique_ptr<ArrayJoinExistingAggregation> pNewMap;
1046
1047 // For each aggVar:
1048 // If it's a Grid, add the coordinate variable as a new map vector
1049 // since we left it out in the actual Grid until aggregated.
1050 // If it's an Array, do nothing
1051 auto endIt = endAggVarIter();
1052 for (auto it = beginAggVarIter(); it != endIt; ++it) {
1053 const string& aggVar = *it;
1054 BaseType* pAggVar = AggregationUtil::getVariableNoRecurse(*pAggDDS, aggVar);
1055
1056 // HACK TODO clean this downcast later when we refactor this file.
1057 GridJoinExistingAggregation* pGrid = dynamic_cast<GridJoinExistingAggregation*>(pAggVar);
1058 if (pGrid) {
1059 // If we don't find it, but we're the first Grid, then assume it's in the Grid maps
1060 // and create it. Will be reused by other Grid's.
1061 // We also do this if it was a placeholder since we need to replace it!
1062 if (!pCV || placeholderExists) {
1063 pNewMap = pGrid->makeAggregatedOuterMapVector();
1064 VALID_PTR(pNewMap.get());
1065
1066 // If there was a placeholder, we need to
1067 // grab its metadata as a changeset and replace
1068 // the variable in the DDS with the new one.
1069 if (placeholderExists) {
1070 processPlaceholderCoordinateVariableForJoinExisting(*pDimNameVar, pNewMap.get());
1071 }
1072
1073 // this will make a copy, so the unique_ptr is ok.
1074 AggregationUtil::addOrReplaceVariableForName(pAggDDS, *(pNewMap.get()));
1075
1076 // Use the new one as the coordinate variable for the maps below
1077 pCV = pNewMap.get();
1078 }
1079
1080 // It MUST exist for a Grid since we have to add it for completeness.
1081 NCML_ASSERT_MSG(pCV, "Expected a coordinate variable since a Grid exists... what happened?");
1082
1083 // Add the given map to the Grid as a copy
1084 pGrid->prepend_map(pCV, true);
1085 }
1086 }
1087}
1088
1089void AggregationElement::processPlaceholderCoordinateVariableForJoinExisting(const libdap::BaseType& placeholderVar,
1090 libdap::Array* pNewVar)
1091{
1092 VALID_PTR(pNewVar);
1093
1094 // Make sure the types of the placeholder scalar and created array match or the author goofed
1095 BaseType* pNewEltProto = pNewVar->var();
1096 VALID_PTR(pNewEltProto);
1097 if (placeholderVar.type() != pNewEltProto->type()) {
1098 THROW_NCML_PARSE_ERROR(line(),
1099 " We expected the type of the placeholder coordinate variable to be the same "
1100 " as that created by the aggregation. Expected type=" + pNewEltProto->type_name()
1101 + +" but placeholder has type=" + placeholderVar.type_name()
1102 + " Please make sure these match in the input file!");
1103 }
1104
1105 // Pull the metadata into the new c.v. from the placeholder
1106 AggregationUtil::gatherMetadataChangesFrom(pNewVar, placeholderVar);
1107
1108 // Let the validation know that we got values for the original value and to remove the entry
1109 // since we're about to delete the pointer to pBT!
1110 getParentDataset()->setVariableGotValues(const_cast<BaseType*>(&placeholderVar), true);
1111}
1112
1114{
1115 _coordinateAxisType = cat;
1116}
1117
1118const std::string&
1120{
1121 return _coordinateAxisType;
1122}
1123
1124libdap::Array*
1125AggregationElement::ensureVariableIsProperNewCoordinateVariable(libdap::BaseType* pBT, const agg_util::Dimension& dim,
1126 bool throwOnInvalidCV) const
1127{
1128 VALID_PTR(pBT);
1129 Array* pArrRet = 0;
1130
1131 // If 1D array with name == dim....
1132 if (AggregationUtil::couldBeCoordinateVariable(pBT)) {
1133 // Ensure the dimensionalities match
1134 Array* pArr = static_cast<Array*>(pBT);
1135 if (pArr->length() == static_cast<int>(dim.size)) {
1136 // OK, it's a valid return value.
1137 pArrRet = pArr;
1138 }
1139 else // Dimensionality mismatch, exception or return NULL.
1140 {
1141 ostringstream oss;
1142 oss << string("In the aggregation for dimension=") << dim.name
1143 << ": The coordinate variable we found does NOT have the same dimensionality as the"
1144 "aggregated dimension! We expected dimensionality=" << dim.size
1145 << " but the coordinate variable had dimensionality=" << pArr->length();
1146 BESDEBUG("ncml", oss.str() << endl);
1147 if (throwOnInvalidCV) {
1148 THROW_NCML_PARSE_ERROR(line(), oss.str());
1149 }
1150 }
1151 }
1152
1153 else // Name exists, but not a coordinate variable, then exception or return null.
1154 {
1155 std::ostringstream msg;
1156 msg << "Aggregation found a variable matching aggregated dimension name=" << dim.name
1157 << " but it was not a coordinate variable. "
1158 " It must be a 1D array whose dimension name is the same as its name. ";
1159 BESDEBUG("ncml", "AggregationElement::ensureVariableIsProperNewCoordinateVariable: " + msg.str() << endl);
1160 if (throwOnInvalidCV) {
1161 THROW_NCML_PARSE_ERROR(line(), msg.str())
1162 }
1163 }
1164 // Return valid Array or null on failures.
1165 return pArrRet;
1166}
1167
1168libdap::Array*
1169AggregationElement::findMatchingCoordinateVariable(const DDS& dds, const agg_util::Dimension& dim,
1170 bool throwOnInvalidCV/*=true*/) const
1171{
1172 BaseType* pBT = AggregationUtil::getVariableNoRecurse(dds, dim.name);
1173
1174 // Name doesn't exist, just NULL. We'll have to create it from scratch
1175 if (!pBT) {
1176 return 0;
1177 }
1178
1179 return ensureVariableIsProperNewCoordinateVariable(pBT, dim, throwOnInvalidCV);
1180}
1181
1194libdap::Array*
1195AggregationElement::processDeferredCoordinateVariable(libdap::BaseType* pBT, const agg_util::Dimension& dim)
1196{
1197 VALID_PTR(pBT);
1198
1199 BESDEBUG("ncml",
1200 "Processing the placeholder coordinate variable (no values) for the " "current aggregation to add placeholder metadata to the generated values..." << endl);
1201
1202 // Generate the c.v. as if we had no placeholder since pBT will be a scalar (shape cannot
1203 // be defined on it by ncml spec defn).
1204 // @OPTIMIZE try to refactor this to avoid unnecessary copies.
1205 unique_ptr<Array> pNewArrCV = createCoordinateVariableForNewDimension(dim);
1206 NCML_ASSERT_MSG(pNewArrCV.get(), " createCoordinateVariableForNewDimension()"
1207 " returned null.");
1208
1209 // Make sure the types of the placeholder scalar and created array match or the author goofed
1210 BaseType* pNewEltProto = pNewArrCV->var();
1211 VALID_PTR(pNewEltProto);
1212 if (pBT->type() != pNewEltProto->type()) {
1213 THROW_NCML_PARSE_ERROR(line(),
1214 " We expected the type of the placeholder coordinate variable to be the same "
1215 " as that created by the aggregation. Expected type=" + pNewEltProto->type_name()
1216 + +" but placeholder has type=" + pBT->type_name()
1217 + " Please make sure these match in the input file!");
1218 }
1219
1220 // Let the validation know that we got values for the original value and to remove the entry
1221 // since we're about to delete the pointer to pBT!
1222 getParentDataset()->setVariableGotValues(pBT, true);
1223
1224 // Copy the entire AttrTable tree (recursively) from the place holder into the new variable
1225 pNewArrCV->get_attr_table() = pBT->get_attr_table();
1226
1227 // Delete the placeholder
1228 DDS* pDDS = getParentDataset()->getDDS();
1229 VALID_PTR(pDDS);
1230 pDDS->del_var(pBT->name());
1231
1232 // Add the new one, which will copy it (argh! we need to fix this in libdap!)
1233 // OPTIMIZE use non copy add when available.
1234 BESDEBUG("ncml", "Adding CV: " << pNewArrCV->name() << endl);
1235#if 0
1236 pDDS->add_var(pNewArrCV.get()); // use raw ptr for the copy.
1237#endif
1238 pDDS->add_var_nocopy(pNewArrCV.release());
1239
1240 // Pull out the copy we just added and hand it back
1241 Array* pArrCV = static_cast<Array*>(AggregationUtil::getVariableNoRecurse(*pDDS, dim.name));
1242 VALID_PTR(pArrCV);
1243 return pArrCV;
1244}
1245
1246unique_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimension(
1247 const agg_util::Dimension& dim) const
1248{
1249 // Get the netcdf@coordValue or use the netcdf@location (or auto generate if empty() ).
1250 NCML_ASSERT(_datasets.size() > 0);
1251 bool hasCoordValue = !(_datasets[0]->coordValue().empty());
1252 if (hasCoordValue) {
1253 return createCoordinateVariableForNewDimensionUsingCoordValue(dim);
1254 }
1255 else {
1256 return createCoordinateVariableForNewDimensionUsingLocation(dim);
1257 }
1258}
1259
1260libdap::Array*
1261AggregationElement::createAndAddCoordinateVariableForNewDimension(DDS& dds, const agg_util::Dimension& dim)
1262{
1263 unique_ptr<libdap::Array> pNewCV = createCoordinateVariableForNewDimension(dim);
1264
1265 // Make sure it did it
1266 NCML_ASSERT_MSG(pNewCV.get(),
1267 "AgregationElement::createCoordinateVariableForNewDimension() failed to create a coordinate variable!");
1268
1269 // Add it to the DDS, which will make a copy
1270 // (change this when we add noncopy add_var to DDS)
1271 //
1272 // Fix. This will append the variable to the DDS; we need these CVs to be
1273 // prefixes to the Grids (so that old versions of the netCDF library will
1274 // recognize them). jhrg 10/17/11
1275 BESDEBUG("ncml2", "AggregationElement::createAndAddCoordinateVariableForNewDimension: " << pNewCV->name());
1276#if 0
1277 dds.add_var(pNewCV.get());
1278#else
1279 // This provides a way to remember where the last CV was inserted and adds
1280 // this one after it. That provides the behavior that all of the CVs are
1281 // added at the beginning of the DDS but in the order they appear in the NCML.
1282 // That will translate into a greater chance of success for users, I think ...
1283 //
1284 // See also similar code in AggregationUtil::addCopyOfVariableIfNameIsAvailable.
1285 // jhrg 10/17/11
1286 static int last_added = 0;
1287 DDS::Vars_iter pos = dds.var_begin();
1288 for (int i = 0; i < last_added; ++i)
1289 ++pos;
1290
1291 dds.insert_var(pos, pNewCV.get());
1292 ++last_added;
1293#endif
1294 // Grab the copy back out and set to our expected result.
1295 Array* pCV = static_cast<Array*>(AggregationUtil::getVariableNoRecurse(dds, dim.name));
1296
1297 NCML_ASSERT_MSG(pCV, "Logic Error: tried to add a new coordinate variable while processing joinNew"
1298 " but we couldn't locate it!");
1299 return pCV;
1300}
1301
1302unique_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValue(
1303 const agg_util::Dimension& dim) const
1304{
1305 NCML_ASSERT(_datasets.size() > 0);
1306 NCML_ASSERT_MSG(_datasets.size() == dim.size, "Logic error: Number of datasets doesn't match dimension!");
1307 // Use first dataset to define the proper type
1308 double doubleVal = 0;
1309 if (_datasets[0]->getCoordValueAsDouble(doubleVal)) {
1310 return createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(dim);
1311 }
1312 else {
1313 return createCoordinateVariableForNewDimensionUsingCoordValueAsString(dim);
1314 }
1315}
1316
1317unique_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsDouble(
1318 const agg_util::Dimension& dim) const
1319{
1320 vector<dods_float64> coords;
1321 coords.reserve(dim.size);
1322 double doubleVal = 0;
1323 // Use the index rather than iterator so we can use it in debug output...
1324 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1325 const NetcdfElement* pDataset = _datasets[i];
1326 if (!pDataset->getCoordValueAsDouble(doubleVal)) {
1327 THROW_NCML_PARSE_ERROR(line(),
1328 "In creating joinNew coordinate variable from coordValue, expected a coordValue of type double"
1329 " but failed! coordValue=" + pDataset->coordValue() + " which was in the dataset location="
1330 + pDataset->location() + " with title=\"" + pDataset->title() + "\"");
1331 }
1332 else // we got our value fine, so add it
1333 {
1334 coords.push_back(static_cast<dods_float64>(doubleVal));
1335 }
1336 }
1337
1338 // If we got here, we have the array of coords.
1339 // So we need to make the proper array, fill it in, and return it.
1340 unique_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<Float64>", dim.name, true);
1341 NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsDouble: failed to create"
1342 " the new Array<Float64> for variable: " + dim.name);
1343 pNewCV->append_dim(dim.size, dim.name);
1344 pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1345 return pNewCV;
1346}
1347
1348unique_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingCoordValueAsString(
1349 const agg_util::Dimension& dim) const
1350{
1351 // I feel suitably dirty for cut and pasting this.
1352 vector<string> coords;
1353 coords.reserve(dim.size);
1354 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1355 const NetcdfElement* pDataset = _datasets[i];
1356 if (pDataset->coordValue().empty()) {
1357 int parseLine = line();
1358 THROW_NCML_PARSE_ERROR(parseLine,
1359 "In creating joinNew coordinate variable from coordValue, expected a coordValue of type string"
1360 " but it was empty! dataset location=" + pDataset->location() + " with title=\"" + pDataset->title()
1361 + "\"");
1362 }
1363 else // we got our value fine, so add it
1364 {
1365 coords.push_back(pDataset->coordValue());
1366 }
1367 }
1368 // If we got here, we have the array of coords.
1369 // So we need to make the proper array, fill it in, and return it.
1370 unique_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1371 NCML_ASSERT_MSG(pNewCV.get(), "createCoordinateVariableForNewDimensionUsingCoordValueAsString: failed to create"
1372 " the new Array<String> for variable: " + dim.name);
1373 pNewCV->append_dim(dim.size, dim.name);
1374 pNewCV->set_value(coords, coords.size()); // this will set the length correctly.
1375 return pNewCV;
1376}
1377
1378unique_ptr<libdap::Array> AggregationElement::createCoordinateVariableForNewDimensionUsingLocation(
1379 const agg_util::Dimension& dim) const
1380{
1381 // I feel suitably dirty for cut and pasting this.
1382 vector<string> coords;
1383 coords.reserve(dim.size);
1384 for (unsigned int i = 0; i < _datasets.size(); ++i) {
1385 const NetcdfElement* pDataset = _datasets[i];
1386 string location("");
1387 if (pDataset->location().empty()) {
1388 std::ostringstream oss;
1389 oss << "Virtual_Dataset_" << i;
1390 location = oss.str();
1391 }
1392 else // we got our value fine, so add it
1393 {
1394 location = pDataset->location();
1395 }
1396 coords.push_back(location);
1397 }
1398 // If we got here, we have the array of coords.
1399 // So we need to make the proper array, fill it in, and return it.
1400 unique_ptr<Array> pNewCV = MyBaseTypeFactory::makeArrayTemplateVariable("Array<String>", dim.name, true);
1401 NCML_ASSERT_MSG(pNewCV.get(),
1402 "createCoordinateVariableForNewDimensionUsingCoordValueUsingLocation: failed to create"
1403 " the new Array<String> for variable: " + dim.name);
1404
1405 pNewCV->append_dim(dim.size, dim.name);
1406 pNewCV->set_value(coords, coords.size());
1407 return pNewCV;
1408}
1409
1410void AggregationElement::collectDatasetsInOrder(vector<const DDS*>& ddsList) const
1411{
1412 ddsList.resize(0);
1413 ddsList.reserve(_datasets.size());
1414 vector<NetcdfElement*>::const_iterator endIt = _datasets.end();
1415 vector<NetcdfElement*>::const_iterator it;
1416 for (it = _datasets.begin(); it != endIt; ++it) {
1417 const NetcdfElement* elt = *it;
1418 VALID_PTR(elt);
1419 const DDS* pDDS = elt->getDDS();
1420 VALID_PTR(pDDS);
1421 ddsList.push_back(pDDS);
1422 }
1423}
1424
1425void AggregationElement::collectAggMemberDatasets(AMDList& rMemberDatasets) const
1426{
1427 rMemberDatasets.resize(0);
1428 rMemberDatasets.reserve(_datasets.size());
1429
1430 for (vector<NetcdfElement*>::const_iterator it = _datasets.begin(); it != _datasets.end(); ++it) {
1431 VALID_PTR(*it);
1432 RCPtr<AggMemberDataset> pAGM((*it)->getAggMemberDataset());
1433 VALID_PTR(pAGM.get());
1434
1435 // Push down the ncoords hint if it was given
1436 if (!((*it)->ncoords().empty()) && !_dimName.empty()) {
1437 if (!(pAGM->isDimensionCached(_dimName))) {
1438 unsigned int ncoords = (*it)->getNcoordsAsUnsignedInt();
1439 pAGM->setDimensionCacheFor(agg_util::Dimension(_dimName, ncoords), false);
1440 }
1441 }
1442
1443 // don't need to ref(), the RCPtr copy ctor in the vector elt
1444 // takes care of it when we push_back()
1445 rMemberDatasets.push_back(pAGM);
1446 }
1447}
1448
1449void AggregationElement::processAnyScanElements()
1450{
1451 if (_scanners.size() > 0) {
1452 BESDEBUG("ncml", "Started to process " << _scanners.size() << " scan elements..." << endl);
1453 }
1454
1455 vector<ScanElement*>::iterator it;
1456 vector<ScanElement*>::iterator endIt = _scanners.end();
1457 vector<NetcdfElement*> scannedDatasets;
1458 for (it = _scanners.begin(); it != endIt; ++it) {
1459 BESDEBUG("ncml", "Processing scan element = " << (*it)->toString() << " ..." << endl);
1460
1461 // Run the scanner to get the scanned datasets.
1462 // These will be sorted, so maintain order.
1463 (*it)->getDatasetList(scannedDatasets);
1464
1465 // Add the datasets using the parser call to
1466 // set the data up correctly,
1467 // then unref() and remove them from the temp array
1468 vector<NetcdfElement*>::iterator datasetIt;
1469 vector<NetcdfElement*>::iterator datasetEndIt = scannedDatasets.end();
1470 for (datasetIt = scannedDatasets.begin(); datasetIt != datasetEndIt; ++datasetIt) {
1471 // this will ref() it and make sure we can load it.
1472 _parser->addChildDatasetToCurrentDataset(*datasetIt);
1473 // so we unref() it afterwards because we're dumping the temp array
1474 (*datasetIt)->unref();
1475 }
1476 // we're done with it and they're all unref().
1477 scannedDatasets.clear();
1478 }
1479}
1480
1481void AggregationElement::mergeDimensions(bool checkDimensionMismatch/*=true*/, const std::string& dimToSkip/*=""*/)
1482{
1483 NetcdfElement* pParent = getParentDataset();
1484 // For each dataset in the children....
1485 vector<NetcdfElement*>::const_iterator datasetsEndIt = _datasets.end();
1486 vector<NetcdfElement*>::const_iterator datasetsIt;
1487 for (datasetsIt = _datasets.begin(); datasetsIt != datasetsEndIt; ++datasetsIt) {
1488 // Check each dimension in it compared to the parent
1489 const NetcdfElement* dataset = *datasetsIt;
1490 VALID_PTR(dataset);
1491 const vector<DimensionElement*>& dimensions = dataset->getDimensionElements();
1492 vector<DimensionElement*>::const_iterator dimEndIt = dimensions.end();
1493 vector<DimensionElement*>::const_iterator dimIt;
1494 for (dimIt = dimensions.begin(); dimIt != dimEndIt; ++dimIt) {
1495 const DimensionElement* pDim = *dimIt;
1496 VALID_PTR(pDim);
1497 // Skip if asked to do so
1498 if (!dimToSkip.empty() && (pDim->name() == dimToSkip)) {
1499 continue;
1500 }
1501 // Otherwise continue to look it up
1502 const DimensionElement* pUnionDim = pParent->getDimensionInLocalScope(pDim->name());
1503 if (pUnionDim) {
1504 // We'll check the dimensions match no matter what, but only warn unless we're told to check
1505 if (!pUnionDim->checkDimensionsMatch(*pDim)) {
1506 string msg = string("The union aggregation already had a dimension=") + pUnionDim->toString()
1507 + " but we found another with different cardinality: " + pDim->toString()
1508 + " This is likely an error and could cause a later exception.";
1509 BESDEBUG("ncml", "WARNING: " + msg);
1510 if (checkDimensionMismatch) {
1511 THROW_NCML_PARSE_ERROR(_parser->getParseLineNumber(),
1512 msg + " Scope=" + _parser->getScopeString());
1513 }
1514 }
1515 }
1516 else // if not in the union already, we want to add it!
1517 {
1518 // this will up the ref count for it so when child dataset dies, we're good.
1519 BESDEBUG("ncml",
1520 "Dimension name=" << pDim->name() << " was not found in the union yet, so adding it. The full elt is: " << pDim->toString() << endl);
1521 pParent->addDimension(const_cast<DimensionElement*>(pDim));
1522 }
1523 }
1524 }
1525}
1526
1527#define COORDINATE_AXIS_TYPE_ATTR "_CoordinateAxisType"
1528void AggregationElement::addCoordinateAxisType(libdap::Array& rCV, const std::string& cat)
1529{
1530 AttrTable& rAT = rCV.get_attr_table();
1531 AttrTable::Attr_iter foundIt = rAT.simple_find(COORDINATE_AXIS_TYPE_ATTR);
1532 // preexists, then delete it and we'll replace with the new
1533 if (foundIt != rAT.attr_end()) {
1534 rAT.del_attr(COORDINATE_AXIS_TYPE_ATTR);
1535 }
1536
1537 BESDEBUG("ncml3",
1538 "Adding attribute to the aggregation variable " << rCV.name() << " Attr is " << COORDINATE_AXIS_TYPE_ATTR << " = " << cat << endl);
1539
1540 // Either way, now we can add it.
1541 rAT.append_attr(COORDINATE_AXIS_TYPE_ATTR, "String", cat);
1542}
1543
1544vector<string> AggregationElement::getValidAttributes()
1545{
1546 vector<string> attrs;
1547 attrs.push_back("type");
1548 attrs.push_back("dimName");
1549 attrs.push_back("recheckEvery");
1550 return attrs;
1551}
1552
1553
1554}
1555
1556// namespace ncml_module
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static AggMemberDatasetDimensionCache * get_instance()
virtual void fillDimensionCacheByUsingDDS()=0
unique_ptr< ArrayJoinExistingAggregation > makeAggregatedOuterMapVector() const
virtual int ref() const
Definition: RCObject.cc:71
virtual int unref() const
Definition: RCObject.cc:78
A reference to an RCObject which automatically ref() and deref() on creation and destruction.
Definition: RCObject.h:284
const std::string & getAggregationVariableCoordinateAxisType() const
void setAggregationVariableCoordinateAxisType(const std::string &cat)
void addChildDataset(NetcdfElement *pDataset)
NetcdfElement * setParentDataset(NetcdfElement *parent)
Private Impl.
void addScanElement(ScanElement *pScanner)
bool isAggregationVariable(const string &name) const
virtual void setAttributes(const XMLAttributeMap &attrs)
virtual const string & getTypeName() const
void addAggregationVariable(const string &name)
virtual AggregationElement * clone() const
static std::unique_ptr< libdap::Array > makeArrayTemplateVariable(const std::string &type, const std::string &name, bool addTemplateVar)
Base class for NcML element concrete classes.
Definition: NCMLElement.h:61
virtual bool validateAttributes(const XMLAttributeMap &attrs, const std::vector< std::string > &validAttrs, std::vector< std::string > *pInvalidAttrs=0, bool printInvalid=true, bool throwOnError=true)
Definition: NCMLElement.cc:174
static std::string printAttributeIfNotEmpty(const std::string &attrName, const std::string &attrValue)
Definition: NCMLElement.cc:212
int getParseLineNumber() const
Definition: NCMLParser.cc:200
static bool isAllWhitespace(const std::string &str)
Definition: NCMLUtil.cc:105
Concrete class for NcML <netcdf> element.
Definition: NetcdfElement.h:64
void addDimension(DimensionElement *dim)
virtual const libdap::DDS * getDDS() const
virtual NetcdfElement * clone() const
void setChildAggregation(AggregationElement *agg, bool throwIfExists=true)
const DimensionElement * getDimensionInLocalScope(const std::string &name) const
void setVariableGotValues(libdap::BaseType *pVarToValidate, bool removeEntry)
virtual std::string toString() const
void setParentAggregation(AggregationElement *parent)
AggregationElement * getChildAggregation() const
virtual ScanElement * clone() const
Definition: ScanElement.cc:136
void setParent(AggregationElement *pParent)
Definition: ScanElement.cc:124
const std::string getValueForLocalNameOrDefault(const std::string &localname, const std::string &defVal="") const
Definition: XMLHelpers.cc:181
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...