bes Updated for version 3.20.13
CmrCatalog.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2//
3// CMRCatalog.cc
4//
5// This file is part of BES cmr_module
6//
7// Copyright (c) 2018 OPeNDAP, Inc.
8// Author: Nathan Potter <ndp@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25// Please read the full copyright statement in the file COPYRIGHT_URI.
26//
27
28#include "config.h"
29
30#include <sys/types.h>
31#include <sys/stat.h>
32#include <dirent.h>
33
34#include <cstring>
35#include <cerrno>
36
37#include <sstream>
38#include <cassert>
39
40#include <memory>
41#include <algorithm>
42
43#include "BESUtil.h"
44#include "BESCatalogUtils.h"
45#include "BESCatalogEntry.h"
46
47#include "CatalogNode.h"
48#include "CatalogItem.h"
49
50#include "BESInfo.h"
51#include "BESContainerStorageList.h"
52#include "BESFileContainerStorage.h"
53#include "BESLog.h"
54
55#include "BESInternalError.h"
56#include "BESForbiddenError.h"
57#include "BESNotFoundError.h"
58#include "BESSyntaxUserError.h"
59
60#include "TheBESKeys.h"
61#include "BESDebug.h"
62
63#include "CmrApi.h"
64#include "CmrNames.h"
65#include "CmrCatalog.h"
66
67using namespace bes;
68using namespace std;
69
70#define prolog std::string("CmrCatalog::").append(__func__).append("() - ")
71
72namespace cmr {
73
85CmrCatalog::CmrCatalog(const std::string &name /* = “CMR” */) : BESCatalog(name) {
86 bool found = false;
87 TheBESKeys::TheKeys()->get_values(CMR_COLLECTIONS, d_collections, found);
88 if(!found){
89 throw BESInternalError(string("The CMR module must define at least one collection name using the key; '")+CMR_COLLECTIONS
90 +"'", __FILE__, __LINE__);
91 }
92
93 found = false;
94 TheBESKeys::TheKeys()->get_values(CMR_FACETS, d_facets, found);
95 if(!found){
96 throw BESInternalError(string("The CMR module must define at least one facet name using the key; '")+CMR_COLLECTIONS
97 +"'", __FILE__, __LINE__);
98 }
99}
100
101CmrCatalog::~CmrCatalog()
102{
103}
105CmrCatalog::get_node(const string &path) const
106{
107 return get_node_NEW(path);
108}
109
110
112CmrCatalog::get_node_NEW(const string &ppath) const
113{
114 string path = BESUtil::normalize_path(ppath,true, false);
115 vector<string> path_elements = BESUtil::split(path);
116 BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
117
118 string epoch_time = BESUtil::get_time(0,false);
119
120 CmrApi cmrApi;
121 bes::CatalogNode *node;
122
123 if(path_elements.empty()){
124 node = new CatalogNode("/");
125 node->set_lmt(epoch_time);
126 node->set_catalog_name(CMR_CATALOG_NAME);
127 for(size_t i=0; i<d_collections.size() ; i++){
128 CatalogItem *collection = new CatalogItem();
129 collection->set_name(d_collections[i]);
130 collection->set_type(CatalogItem::node);
131 node->add_node(collection);
132 }
133 }
134 else {
135 for(size_t i=0; i< path_elements.size() ;i++){
136 if(path_elements[i]=="-")
137 path_elements[i] = "";
138 }
139
140 string collection = path_elements[0];
141 BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_collections.size(): " << d_collections.size() << endl);
142 bool valid_collection = false;
143 for(size_t i=0; i<d_collections.size() && !valid_collection ; i++){
144 if(collection == d_collections[i])
145 valid_collection = true;
146 }
147 if(!valid_collection){
148 throw BESNotFoundError("The CMR catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
149 }
150 BESDEBUG(MODULE, prolog << "Collection " << collection << " is valid." << endl);
151 if(path_elements.size() >1){
152 string facet = path_elements[1];
153 bool valid_facet = false;
154 for(size_t i=0; i<d_facets.size() && !valid_facet ; i++){
155 if(facet == d_facets[i])
156 valid_facet = true;
157 }
158 if(!valid_facet){
159 throw BESNotFoundError("The CMR collection '"+collection+"' does not contain a facet named '"+facet+"'",__FILE__,__LINE__);
160 }
161
162 if(facet=="temporal"){
163 BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
164 node = new CatalogNode(path);
165 node->set_lmt(epoch_time);
166 node->set_catalog_name(CMR_CATALOG_NAME);
167
168
169 switch( path_elements.size()){
170
171 case 2: // The path ends at temporal facet, so we need the year nodes.
172 {
173 vector<string> years;
174
175 BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection<< endl);
176 cmrApi.get_years(collection, years);
177 for(size_t i=0; i<years.size() ; i++){
178 CatalogItem *collection = new CatalogItem();
179 collection->set_type(CatalogItem::node);
180 collection->set_name(years[i]);
181 collection->set_is_data(false);
182 collection->set_lmt(epoch_time);
183 collection->set_size(0);
184 node->add_node(collection);
185 }
186 }
187 break;
188
189 case 3: // The path ends at years facet, so we need the month nodes.
190 {
191 string year = path_elements[2];
192 string month("");
193 string day("");
194 vector<string> months;
195
196 BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection << " year: " << year << endl);
197 cmrApi.get_months(collection, year, months);
198 for(size_t i=0; i<months.size() ; i++){
199 CatalogItem *collection = new CatalogItem();
200 collection->set_type(CatalogItem::node);
201 collection->set_name(months[i]);
202 collection->set_is_data(false);
203 collection->set_lmt(epoch_time);
204 collection->set_size(0);
205 node->add_node(collection);
206 }
207 }
208 break;
209
210 case 4: // The path ends at months facet, so we need the day nodes.
211 {
212 string year = path_elements[2];
213 string month = path_elements[3];
214 string day("");
215 vector<string> days;
216
217 BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection << " year: " << year << " month: " << month << endl);
218 cmrApi.get_days(collection, year, month, days);
219 for(size_t i=0; i<days.size() ; i++){
220 CatalogItem *collection = new CatalogItem();
221 collection->set_type(CatalogItem::node);
222 collection->set_name(days[i]);
223 collection->set_is_data(false);
224 collection->set_lmt(epoch_time);
225 collection->set_size(0);
226 node->add_node(collection);
227 }
228 }
229 break;
230
231 case 5: // The path ends at the days facet, so we need the granule nodes.
232 {
233 string year = path_elements[2];
234 string month = path_elements[3];
235 string day = path_elements[4];
236 BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection << " year: " << year << " month: " << month << " day: " << day << endl);
237 vector<Granule *> granules;
238 cmrApi.get_granules(collection, year, month, day, granules);
239 for(size_t i=0; i<granules.size() ; i++){
240 node->add_leaf(granules[i]->getCatalogItem(get_catalog_utils()));
241 }
242 }
243 break;
244
245 case 6: // Looks like they are trying to get a particular granule...
246 {
247 string year = path_elements[2];
248 string month = path_elements[3];
249 string day = path_elements[4];
250 string granule_id = path_elements[5];
251 BESDEBUG(MODULE, prolog << "Request resolved to leaf granule/dataset name, collection: " << collection << " year: " << year
252 << " month: " << month << " day: " << day << " granule: " << granule_id << endl);
253 Granule *granule = cmrApi.get_granule(collection,year,month,day,granule_id);
254 if(granule){
255 CatalogItem *granuleItem = new CatalogItem();
256 granuleItem->set_type(CatalogItem::leaf);
257 granuleItem->set_name(granule->getName());
258 granuleItem->set_is_data(true);
259 granuleItem->set_lmt(granule->getLastModifiedStr());
260 granuleItem->set_size(granule->getSize());
261 node->set_leaf(granuleItem);
262 }
263 else {
264 throw BESNotFoundError("No such resource: "+path,__FILE__,__LINE__);
265 }
266 }
267 break;
268
269 default:
270 {
271 throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
272 }
273 break;
274 }
275
276 }
277 else {
278 throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
279 }
280 }
281 else {
282 BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection << endl);
283 node = new CatalogNode(path);
284 node->set_lmt(epoch_time);
285 node->set_catalog_name(CMR_CATALOG_NAME);
286 for(size_t i=0; i<d_facets.size() ; i++){
287 CatalogItem *collection = new CatalogItem();
288 collection->set_name(d_facets[i]);
289 collection->set_type(CatalogItem::node);
290 collection->set_lmt(epoch_time);
291 BESDEBUG(MODULE, prolog << "Adding facet: " << d_facets[i] << endl);
292 node->add_node(collection);
293 }
294 }
295 }
296 return node;
297}
298
299
300// path must start with a '/'. By this class it will be interpreted as a
301// starting at the CatalogDirectory instance's root directory. It may either
302// end in a '/' or not.
303//
304// If it is not a directory - that is an error. (return null or throw?)
305//
306// Item names are relative
322CmrCatalog::get_node_OLD(const string &ppath) const
323{
324 string path = BESUtil::normalize_path(ppath,true, false);
325 vector<string> path_elements = BESUtil::split(path);
326 BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
327
328 string epoch_time = BESUtil::get_time(0,false);
329
330 CmrApi cmrApi;
331 bes::CatalogNode *node;
332
333 if(path_elements.empty()){
334 node = new CatalogNode("/");
335 node->set_lmt(epoch_time);
336 node->set_catalog_name(CMR_CATALOG_NAME);
337 for(size_t i=0; i<d_collections.size() ; i++){
338 CatalogItem *collection = new CatalogItem();
339 collection->set_name(d_collections[i]);
340 collection->set_type(CatalogItem::node);
341 node->add_node(collection);
342 }
343 }
344 else {
345 string collection = path_elements[0];
346 BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_collections.size(): " << d_collections.size() << endl);
347 bool valid_collection = false;
348 for(size_t i=0; i<d_collections.size() && !valid_collection ; i++){
349 if(collection == d_collections[i])
350 valid_collection = true;
351 }
352 if(!valid_collection){
353 throw BESNotFoundError("The CMR catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
354 }
355 BESDEBUG(MODULE, prolog << "Collection " << collection << " is valid." << endl);
356 if(path_elements.size() >1){
357 string facet = path_elements[1];
358 bool valid_facet = false;
359 for(size_t i=0; i<d_facets.size() && !valid_facet ; i++){
360 if(facet == d_facets[i])
361 valid_facet = true;
362 }
363 if(!valid_facet){
364 throw BESNotFoundError("The CMR collection '"+collection+"' does not contain a facet named '"+facet+"'",__FILE__,__LINE__);
365 }
366
367 if(facet=="temporal"){
368 BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
369 node = new CatalogNode(path);
370 node->set_lmt(epoch_time);
371 node->set_catalog_name(CMR_CATALOG_NAME);
372
373
374 switch( path_elements.size()){
375 case 2: // The path ends at temporal facet, so we need the years.
376 {
377 vector<string> years;
378
379 BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection<< endl);
380 cmrApi.get_years(collection, years);
381 for(size_t i=0; i<years.size() ; i++){
382 CatalogItem *collection = new CatalogItem();
383 collection->set_type(CatalogItem::node);
384 collection->set_name(years[i]);
385 collection->set_is_data(false);
386 collection->set_lmt(epoch_time);
387 collection->set_size(0);
388 node->add_node(collection);
389 }
390 }
391 break;
392 case 3:
393 {
394 string year = path_elements[2];
395 string month("");
396 string day("");
397 vector<string> months;
398
399 BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection << " year: " << year << endl);
400 cmrApi.get_months(collection, year, months);
401 for(size_t i=0; i<months.size() ; i++){
402 CatalogItem *collection = new CatalogItem();
403 collection->set_type(CatalogItem::node);
404 collection->set_name(months[i]);
405 collection->set_is_data(false);
406 collection->set_lmt(epoch_time);
407 collection->set_size(0);
408 node->add_node(collection);
409 }
410 }
411 break;
412 case 4:
413 {
414 string year = path_elements[2];
415 string month = path_elements[3];
416 string day("");
417 vector<string> days;
418
419 BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection << " year: " << year << " month: " << month << endl);
420 cmrApi.get_days(collection, year, month, days);
421 for(size_t i=0; i<days.size() ; i++){
422 CatalogItem *collection = new CatalogItem();
423 collection->set_type(CatalogItem::node);
424 collection->set_name(days[i]);
425 collection->set_is_data(false);
426 collection->set_lmt(epoch_time);
427 collection->set_size(0);
428 node->add_node(collection);
429 }
430 }
431 break;
432 case 5:
433 {
434 string year = path_elements[2];
435 string month = path_elements[3];
436 string day = path_elements[4];
437 BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection << " year: " << year << " month: " << month << " day: " << day << endl);
438 vector<Granule *> granules;
439 cmrApi.get_granules(collection, year, month, day, granules);
440 for(size_t i=0; i<granules.size() ; i++){
441 node->add_leaf(granules[i]->getCatalogItem(get_catalog_utils()));
442 }
443 }
444 break;
445 default:
446 throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
447 break;
448 }
449 }
450 else {
451 throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
452 }
453 }
454 else {
455 BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection << endl);
456 node = new CatalogNode(path);
457 node->set_lmt(epoch_time);
458 node->set_catalog_name(CMR_CATALOG_NAME);
459 for(size_t i=0; i<d_facets.size() ; i++){
460 CatalogItem *collection = new CatalogItem();
461 collection->set_name(d_facets[i]);
462 collection->set_type(CatalogItem::node);
463 collection->set_lmt(epoch_time);
464 BESDEBUG(MODULE, prolog << "Adding facet: " << d_facets[i] << endl);
465 node->add_node(collection);
466 }
467 }
468 }
469 return node;
470}
471
472#if 0
474CmrCatalog::get_node(const string &path) const
475{
476
477 string rootdir = d_utils->get_root_dir();
478
479 // This will throw the appropriate exception (Forbidden or Not Found).
480 // Checks to make sure the different elements of the path are not
481 // symbolic links if follow_sym_links is set to false, and checks to
482 // make sure have permission to access node and the node exists.
483 BESUtil::check_path(path, rootdir, d_utils->follow_sym_links());
484
485 string fullpath = rootdir + path;
486
487 DIR *dip = opendir(fullpath.c_str());
488 if (!dip)
489 throw BESInternalError(
490 "A CMRCatalog can only return nodes for directory. The path '" + path
491 + "' is not a directory for BESCatalog '" + get_catalog_name() + "'.", __FILE__, __LINE__);
492
493 try {
494 // The node is a directory
495
496 // Based on other code (show_catalogs()), use BESCatalogUtils::exclude() on
497 // a directory, but BESCatalogUtils::include() on a file.
498 if (d_utils->exclude(path))
499 throw BESForbiddenError(
500 string("The path '") + path + "' is not included in the catalog '" + get_catalog_name() + "'.",
501 __FILE__, __LINE__);
502
503 CatalogNode *node = new CatalogNode(path);
504
505 node->set_catalog_name(get_catalog_name());
506 struct stat buf;
507 int statret = stat(fullpath.c_str(), &buf);
508 if (statret == 0 /* && S_ISDIR(buf.st_mode) */)
509 node->set_lmt(get_time(buf.st_mtime));
510
511 struct dirent *dit;
512 while ((dit = readdir(dip)) != NULL) {
513 string item = dit->d_name;
514 if (item == "." || item == "..") continue;
515
516 string item_path = fullpath + "/" + item;
517
518 // TODO add a test in configure for the readdir macro(s) DT_REG, DT_LNK
519 // and DT_DIR and use those, if present, to determine if the name is a
520 // link, directory or regular file. These are not present on all systems.
521 // Also, since we need mtime, this is not a huge time saver. But if we
522 // decide not to use the mtime, using these macros could save lots of system
523 // calls. jhrg 3/9/18
524
525 // Skip this dir entry if it is a sym link and follow links is false
526 if (d_utils->follow_sym_links() == false) {
527 struct stat lbuf;
528 (void) lstat(item_path.c_str(), &lbuf);
529 if (S_ISLNK(lbuf.st_mode)) continue;
530 }
531
532 // Is this a directory or a file? Should it be excluded or included?
533 statret = stat(item_path.c_str(), &buf);
534 if (statret == 0 && S_ISDIR(buf.st_mode) && !d_utils->exclude(item)) {
535#if 0
536 // Add a new node; set the size to zero.
537 node->add_item(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
538#endif
539 node->add_node(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
540 }
541 else if (statret == 0 && S_ISREG(buf.st_mode) && d_utils->include(item)) {
542#if 0
543 // Add a new leaf.
544 node->add_item(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
545 d_utils->is_data(item), CatalogItem::leaf));
546#endif
547 node->add_leaf(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
548 d_utils->is_data(item), CatalogItem::leaf));
549 }
550 else {
551 VERBOSE("Excluded the item '" << item_path << "' from the catalog '" << get_catalog_name() << "' node listing." << endl);
552 }
553 } // end of the while loop
554
555 closedir(dip);
556
558
559 sort(node->nodes_begin(), node->nodes_end(), ordering);
560 sort(node->leaves_begin(), node->leaves_end(), ordering);
561
562 return node;
563 }
564 catch (...) {
565 closedir(dip);
566 throw;
567 }
568}
569#endif
570
571
579void CmrCatalog::dump(ostream &strm) const
580{
581 strm << BESIndent::LMarg << prolog << "(" << (void *) this << ")" << endl;
582 BESIndent::Indent();
583
584 strm << BESIndent::LMarg << "catalog utilities: " << endl;
585 BESIndent::Indent();
586 get_catalog_utils()->dump(strm);
587 BESIndent::UnIndent();
588 BESIndent::UnIndent();
589}
590
591} // namespace cmr
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:112
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:102
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition: BESUtil.cc:1065
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition: BESUtil.cc:382
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition: BESUtil.cc:946
static std::string get_time(bool use_local_time=false)
Definition: BESUtil.cc:1014
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
void set_name(std::string n)
Set the name of the item.
Definition: CatalogItem.h:135
void set_size(size_t s)
Set the size of the item.
Definition: CatalogItem.h:140
void set_is_data(bool id)
Is this item data that the BES should interpret?
Definition: CatalogItem.h:150
void set_lmt(std::string lmt)
Set the LMT for this item.
Definition: CatalogItem.h:145
void set_type(item_type t)
Set the type for this item.
Definition: CatalogItem.h:155
void get_years(std::string collection_name, std::vector< std::string > &years_result)
Definition: CmrApi.cc:352
void get_days(std::string collection_name, std::string r_year, std::string r_month, std::vector< std::string > &days_result)
Definition: CmrApi.cc:445
void get_granules(std::string collection_name, std::string r_year, std::string r_month, std::string r_day, std::vector< cmr::Granule * > &granules)
Definition: CmrApi.cc:536
void get_months(std::string collection_name, std::string year, std::vector< std::string > &months_result)
Definition: CmrApi.cc:382
virtual bes::CatalogNode * get_node_OLD(const std::string &path) const
Get a CatalogNode for the given path in the current catalog.
Definition: CmrCatalog.cc:322
CmrCatalog(const std::string &name=CMR_CATALOG_NAME)
A catalog based on NASA's CMR system.
Definition: CmrCatalog.cc:85
virtual void dump(std::ostream &strm) const
dumps information about this object
Definition: CmrCatalog.cc:579