bes Updated for version 3.20.13
NgapApi.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of ngap_module, A C++ module that can be loaded in to
4// the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5
6// Copyright (c) 2020 OPeNDAP, Inc.
7// Author: Nathan Potter <ndp@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27//#include <cstdio>
28//#include <cstring>
29#include <iostream>
30#include <sstream>
31#include <memory>
32#include <time.h>
33#include <curl/curl.h>
34
35//#include <libdap/util.h>
36//#include <libdap/debug.h>
37
38#include "rapidjson/document.h"
39#include "rapidjson/writer.h"
40//#include "rapidjson/prettywriter.h"
41#include "rapidjson/stringbuffer.h"
42#include "rapidjson/filereadstream.h"
43
44//#include "BESError.h"
45#include "BESNotFoundError.h"
46#include "BESSyntaxUserError.h"
47#include "BESInternalError.h"
48
49#include "BESDebug.h"
50#include "BESUtil.h"
51#include "BESStopWatch.h"
52#include "BESLog.h"
53#include "TheBESKeys.h"
54//#include "CurlUtils.h"
55#include "url_impl.h"
56#include "RemoteResource.h"
57
58#include "NgapApi.h"
59#include "NgapNames.h"
60// #include "NgapError.h"
61
62using namespace std;
63
64#define prolog string("NgapApi::").append(__func__).append("() - ")
65
66namespace ngap {
67
68const unsigned int REFRESH_THRESHOLD = 3600; // An hour
69
70
71NgapApi::NgapApi() : d_cmr_hostname(DEFAULT_CMR_ENDPOINT_URL), d_cmr_search_endpoint_path(DEFAULT_CMR_SEARCH_ENDPOINT_PATH) {
72 bool found;
73 string cmr_hostname;
74 TheBESKeys::TheKeys()->get_value(NGAP_CMR_HOSTNAME_KEY, cmr_hostname, found);
75 if (found) {
76 d_cmr_hostname = cmr_hostname;
77 }
78
79 string cmr_search_endpoint_path;
80 TheBESKeys::TheKeys()->get_value(NGAP_CMR_SEARCH_ENDPOINT_PATH_KEY, cmr_search_endpoint_path, found);
81 if (found) {
82 d_cmr_search_endpoint_path = cmr_search_endpoint_path;
83 }
84
85
86}
87
88std::string NgapApi::get_cmr_search_endpoint_url(){
89 return BESUtil::assemblePath(d_cmr_hostname , d_cmr_search_endpoint_path);
90}
91
92
93
101std::string NgapApi::build_cmr_query_url_old_rpath_format(const std::string &restified_path) {
102
103 // Make sure it starts with a '/' (see key strings above)
104 string r_path = ( restified_path[0] != '/' ? "/" : "") + restified_path;
105
106 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
107 if(provider_index == string::npos){
108 stringstream msg;
109 msg << prolog << "The specified path '" << r_path << "'";
110 msg << " does not contain the required path element '" << NGAP_PROVIDERS_KEY << "'";
111 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
112 }
113 if(provider_index != 0){
114 stringstream msg;
115 msg << prolog << "The specified path '" << r_path << "'";
116 msg << " has the path element '" << NGAP_PROVIDERS_KEY << "' located in the incorrect position (";
117 msg << provider_index << ") expected 0.";
118 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
119 }
120 provider_index += string(NGAP_PROVIDERS_KEY).length();
121
122 bool use_collection_concept_id = false;
123 size_t collection_index = r_path.find(NGAP_COLLECTIONS_KEY);
124 if(collection_index == string::npos) {
125 size_t concepts_index = r_path.find(NGAP_CONCEPTS_KEY);
126 if (concepts_index == string::npos) {
127 stringstream msg;
128 msg << prolog << "The specified path '" << r_path << "'";
129 msg << " contains neither the '" << NGAP_COLLECTIONS_KEY << "'";
130 msg << " nor the '" << NGAP_CONCEPTS_KEY << "'";
131 msg << " key, one must be provided.";
132 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
133 }
134 collection_index = concepts_index;
135 use_collection_concept_id = true;
136 }
137 if(collection_index <= provider_index+1){ // The value of provider has to be at least 1 character
138 stringstream msg;
139 msg << prolog << "The specified path '" << r_path << "'";
140 msg << " has the path element '" << (use_collection_concept_id?NGAP_CONCEPTS_KEY:NGAP_COLLECTIONS_KEY) << "' located in the incorrect position (";
141 msg << collection_index << ") expected at least " << provider_index+1;
142 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
143 }
144 string provider = r_path.substr(provider_index,collection_index - provider_index);
145 collection_index += use_collection_concept_id?string(NGAP_CONCEPTS_KEY).length():string(NGAP_COLLECTIONS_KEY).length();
146
147
148 size_t granule_index = r_path.find(NGAP_GRANULES_KEY);
149 if(granule_index == string::npos){
150 stringstream msg;
151 msg << prolog << "The specified path '" << r_path << "'";
152 msg << " does not contain the required path element '" << NGAP_GRANULES_KEY << "'";
153 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
154 }
155 if(granule_index <= collection_index+1){ // The value of collection must have at least one character.
156 stringstream msg;
157 msg << prolog << "The specified path '" << r_path << "'";
158 msg << " has the path element '" << NGAP_GRANULES_KEY << "' located in the incorrect position (";
159 msg << granule_index << ") expected at least " << collection_index+1;
160 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
161 }
162 string collection = r_path.substr(collection_index,granule_index - collection_index);
163 granule_index += string(NGAP_GRANULES_KEY).length();
164
165 // The granule value is the path terminus so it's every thing after the key
166 string granule = r_path.substr(granule_index);
167
168 // Build the CMR query URL for the dataset
169 string cmr_url = get_cmr_search_endpoint_url() + "?";
170 {
171 // This easy handle is only created so we can use the curl_easy_escape() on the token values
172 CURL *ceh = curl_easy_init();
173 char *esc_url_content;
174
175 // Add provider
176 esc_url_content = curl_easy_escape(ceh, provider.c_str(), provider.size());
177 cmr_url += string(CMR_PROVIDER).append("=").append(esc_url_content).append("&");
178 curl_free(esc_url_content);
179
180 esc_url_content = curl_easy_escape(ceh, collection.c_str(), collection.size());
181 if(use_collection_concept_id){
182 // Add collection_concept_id
183 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append("=").append(esc_url_content).append("&");
184 }
185 else {
186 // Add entry_title
187 cmr_url += string(CMR_ENTRY_TITLE).append("=").append(esc_url_content).append("&");
188
189 }
190 curl_free(esc_url_content);
191
192 esc_url_content = curl_easy_escape(ceh, granule.c_str(), granule.size());
193 cmr_url += string(CMR_GRANULE_UR).append("=").append(esc_url_content);
194 curl_free(esc_url_content);
195
196 curl_easy_cleanup(ceh);
197 }
198 return cmr_url;
199}
200
217std::string NgapApi::build_cmr_query_url(const std::string &restified_path) {
218
219 // Make sure it starts with a '/' (see key strings above)
220 string r_path = ( restified_path[0] != '/' ? "/" : "") + restified_path;
221
222 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
223 if(provider_index != string::npos){
224 return build_cmr_query_url_old_rpath_format(restified_path);
225 }
226
227 size_t collections_key_index = r_path.find(NGAP_COLLECTIONS_KEY);
228 if(collections_key_index == string::npos) {
229 stringstream msg;
230 msg << prolog << "The specified path '" << r_path << "'";
231 msg << " contains neither the '" << NGAP_COLLECTIONS_KEY << "'";
232 msg << " nor the '" << NGAP_CONCEPTS_KEY << "'";
233 msg << " one must be provided.";
234 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
235 }
236 if(collections_key_index != 0){ // The COLLECTIONS_KEY comes first
237 stringstream msg;
238 msg << prolog << "The specified path '" << r_path << "'";
239 msg << " has the path element '" << NGAP_COLLECTIONS_KEY << "' located in the incorrect position (";
240 msg << collections_key_index << ") expected at least " << provider_index + 1;
241 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
242 }
243 // This is now the beginning of the collection_concept_id value.
244 size_t collections_index = collections_key_index + string(NGAP_COLLECTIONS_KEY).length();
245
246 size_t granules_key_index = r_path.find(NGAP_GRANULES_KEY);
247 if(granules_key_index == string::npos){
248 stringstream msg;
249 msg << prolog << "The specified path '" << r_path << "'";
250 msg << " does not contain the required path element '" << NGAP_GRANULES_KEY << "'";
251 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
252 }
253
254 // The collection key must precede the granules key in the path,
255 // and the collection name must have at least one character.
256 if(granules_key_index <= collections_index + 1){
257 stringstream msg;
258 msg << prolog << "The specified path '" << r_path << "'";
259 msg << " has the path element '" << NGAP_GRANULES_KEY << "' located in the incorrect position (";
260 msg << granules_key_index << ") expected at least " << collections_index + 1;
261 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
262 }
263 size_t granules_index = granules_key_index + string(NGAP_GRANULES_KEY).length();
264 // The granule_name value is the path terminus so it's every thing after the key
265 string granule_name = r_path.substr(granules_index);
266
267 // Now we need to work on the collections value to eliminate the optional parts.
268 // This is the entire collections string including any optional components.
269 string collection_name = r_path.substr(collections_index, granules_key_index - collections_index);
270
271 // Since there may be optional parameters we need to strip them off to get the collection_concept_id
272 // And, since we know that collection_concept_id will never contain a '/', and we know that the optional
273 // part is separated from the collection_concept_id by a '/' we look for that and of we find it we truncate
274 // the value at that spot.
275 string optional_part;
276 size_t slash_pos = collection_name.find('/');
277 if(slash_pos != string::npos){
278 optional_part = collection_name.substr(slash_pos);
279 BESDEBUG(MODULE, prolog << "Found optional collections name component: " << optional_part << endl);
280 collection_name = collection_name.substr(0,slash_pos);
281 }
282 BESDEBUG(MODULE, prolog << "Found collection_name (aka collection_concept_id): " << collection_name << endl);
283
284 // Build the CMR query URL for the dataset
285 string cmr_url = get_cmr_search_endpoint_url() + "?";
286 {
287 // This easy handle is only created so we can use the curl_easy_escape() on the token values
288 CURL *ceh = curl_easy_init();
289 char *esc_url_content;
290
291 esc_url_content = curl_easy_escape(ceh, collection_name.c_str(), collection_name.size());
292 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append("=").append(esc_url_content).append("&");
293 curl_free(esc_url_content);
294
295 esc_url_content = curl_easy_escape(ceh, granule_name.c_str(), granule_name.size());
296 cmr_url += string(CMR_GRANULE_UR).append("=").append(esc_url_content);
297 curl_free(esc_url_content);
298
299 curl_easy_cleanup(ceh);
300 }
301 return cmr_url;
302}
303
314std::string NgapApi::find_get_data_url_in_granules_umm_json_v1_4(const std::string &restified_path, rapidjson::Document &cmr_granule_response)
315{
316
317 string data_access_url;
318
319 rapidjson::Value &val = cmr_granule_response["hits"];
320 int hits = val.GetInt();
321 if (hits < 1) {
322 throw BESNotFoundError(string("The specified path '").append(restified_path).append(
323 "' does not identify a granule in CMR."), __FILE__, __LINE__);
324 }
325
326 rapidjson::Value &items = cmr_granule_response["items"];
327 if (items.IsArray()) {
328 stringstream ss;
329 if(BESDebug::IsSet(MODULE)){
330 const string RJ_TYPE_NAMES[] = {string("kNullType"),string("kFalseType"),string("kTrueType"),
331 string("kObjectType"),string("kArrayType"),string("kStringType"),string("kNumberType")};
332 for (rapidjson::SizeType i = 0; i < items.Size(); i++) // Uses SizeType instead of size_t
333 ss << "items[" << i << "]: " << RJ_TYPE_NAMES[items[i].GetType()] << endl;
334 BESDEBUG(MODULE, prolog << "items size: " << items.Size() << endl << ss.str() << endl);
335 }
336
337 rapidjson::Value &items_obj = items[0];
338 // rapidjson::GenericMemberIterator<false, rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>> mitr = items_obj.FindMember("umm");
339 auto mitr = items_obj.FindMember("umm");
340
341 rapidjson::Value &umm = mitr->value;
342 mitr = umm.FindMember("RelatedUrls");
343 if (mitr == umm.MemberEnd()) {
344 throw BESInternalError("Error! The umm/RelatedUrls object was not located!", __FILE__, __LINE__);
345 }
346 rapidjson::Value &related_urls = mitr->value;
347
348 if (!related_urls.IsArray()) {
349 throw BESNotFoundError("Error! The RelatedUrls object in the CMR response is not an array!", __FILE__,
350 __LINE__);
351 }
352
353 BESDEBUG(MODULE, prolog << " Found RelatedUrls array in CMR response." << endl);
354
355 bool noSubtype;
356 for (rapidjson::SizeType i = 0; i < related_urls.Size() && data_access_url.empty(); i++) {
357 rapidjson::Value &obj = related_urls[i];
358 mitr = obj.FindMember("URL");
359 if (mitr == obj.MemberEnd()) {
360 stringstream err;
361 err << "Error! The umm/RelatedUrls[" << i << "] does not contain the URL object";
362 throw BESInternalError(err.str(), __FILE__, __LINE__);
363 }
364 rapidjson::Value &r_url = mitr->value;
365
366 mitr = obj.FindMember("Type");
367 if (mitr == obj.MemberEnd()) {
368 stringstream err;
369 err << "Error! The umm/RelatedUrls[" << i << "] does not contain the Type object";
370 throw BESInternalError(err.str(), __FILE__, __LINE__);
371 }
372 rapidjson::Value &r_type = mitr->value;
373
374 noSubtype = obj.FindMember("Subtype") == obj.MemberEnd();
375
376 BESDEBUG(MODULE, prolog << "RelatedUrl Object:" <<
377 " URL: '" << r_url.GetString() << "'" <<
378 " Type: '" << r_type.GetString() << "'" <<
379 " SubType: '" << (noSubtype ? "Absent" : "Present") << "'" << endl);
380
381 if ((r_type.GetString() == string(CMR_URL_TYPE_GET_DATA)) && noSubtype) {
382
383 // Because a member of RelatedUrls may contain a URL of Type GET DATA with the s3:// protocol
384 // as well as a Type GET DATA URL which uses https:// or http://
385 string candidate_url = r_url.GetString();
386 if(candidate_url.substr(0,8) == "https://" || candidate_url.substr(0,7) == "http://"){
387 data_access_url = candidate_url;
388 }
389 }
390 }
391 }
392
393 if (data_access_url.empty()) {
394 throw BESInternalError(string("ERROR! Failed to locate a data access URL for the path: ") + restified_path,
395 __FILE__, __LINE__);
396 }
397
398 return data_access_url;
399}
400
401
402
425 string NgapApi::convert_ngap_resty_path_to_data_access_url(
426 const std::string &restified_path,
427 const std::string &uid
428 ) {
429 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
430 string data_access_url;
431
432 string cmr_query_url = build_cmr_query_url(restified_path);
433
434 BESDEBUG(MODULE, prolog << "CMR Request URL: " << cmr_query_url << endl);
435
436 BESDEBUG(MODULE, prolog << "Building new RemoteResource." << endl);
437 std::shared_ptr<http::url> cmr_query_url_ptr(new http::url(cmr_query_url));
438 http::RemoteResource cmr_query(cmr_query_url_ptr, uid);
439 {
440 BESStopWatch besTimer;
441 if (BESISDEBUG(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
442 besTimer.start("CMR Query: " + cmr_query_url);
443 }
444 cmr_query.retrieveResource();
445 }
446 rapidjson::Document cmr_response = cmr_query.get_as_json();
447
448 data_access_url = find_get_data_url_in_granules_umm_json_v1_4(restified_path, cmr_response);
449
450 BESDEBUG(MODULE, prolog << "END (data_access_url: "<< data_access_url << ")" << endl);
451
452 return data_access_url;
453 }
454
455
456
457
458 bool NgapApi::signed_url_is_expired(const http::url &signed_url)
459 {
460 bool is_expired;
461 time_t now;
462 time(&now); /* get current time; same as: timer = time(NULL) */
463 BESDEBUG(MODULE, prolog << "now: " << now << endl);
464
465 time_t expires = now;
466 string cf_expires = signed_url.query_parameter_value(CLOUDFRONT_EXPIRES_HEADER_KEY);
467 string aws_expires = signed_url.query_parameter_value(AMS_EXPIRES_HEADER_KEY);
468 time_t ingest_time = signed_url.ingest_time();
469
470 if(!cf_expires.empty()){ // CloudFront expires header?
471 expires = stoll(cf_expires);
472 BESDEBUG(MODULE, prolog << "Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY << ": " << expires << endl);
473 }
474 else if(!aws_expires.empty()){
475 // AWS Expires header?
476 //
477 // By default we'll use the time we made the URL object, ingest_time
478 time_t start_time = ingest_time;
479 // But if there's an AWS Date we'll parse that and compute the time
480 // @TODO move to NgapApi::decompose_url() and add the result to the map
481 string aws_date = signed_url.query_parameter_value(AWS_DATE_HEADER_KEY);
482 if(!aws_date.empty()){
483 string date = aws_date; // 20200624T175046Z
484 string year = date.substr(0,4);
485 string month = date.substr(4,2);
486 string day = date.substr(6,2);
487 string hour = date.substr(9,2);
488 string minute = date.substr(11,2);
489 string second = date.substr(13,2);
490
491 BESDEBUG(MODULE, prolog << "date: "<< date <<
492 " year: " << year << " month: " << month << " day: " << day <<
493 " hour: " << hour << " minute: " << minute << " second: " << second << endl);
494
495 struct tm *ti = gmtime(&now);
496 ti->tm_year = stoll(year) - 1900;
497 ti->tm_mon = stoll(month) - 1;
498 ti->tm_mday = stoll(day);
499 ti->tm_hour = stoll(hour);
500 ti->tm_min = stoll(minute);
501 ti->tm_sec = stoll(second);
502
503 BESDEBUG(MODULE, prolog << "ti->tm_year: "<< ti->tm_year <<
504 " ti->tm_mon: " << ti->tm_mon <<
505 " ti->tm_mday: " << ti->tm_mday <<
506 " ti->tm_hour: " << ti->tm_hour <<
507 " ti->tm_min: " << ti->tm_min <<
508 " ti->tm_sec: " << ti->tm_sec << endl);
509
510
511 start_time = mktime(ti);
512 BESDEBUG(MODULE, prolog << "AWS (computed) start_time: "<< start_time << endl);
513 }
514 expires = start_time + stoll(aws_expires);
515 BESDEBUG(MODULE, prolog << "Using "<< AMS_EXPIRES_HEADER_KEY << ": " << aws_expires <<
516 " (expires: " << expires << ")" << endl);
517 }
518 time_t remaining = expires - now;
519 BESDEBUG(MODULE, prolog << "expires_time: " << expires <<
520 " remaining_time: " << remaining <<
521 " refresh_threshold: " << REFRESH_THRESHOLD << endl);
522
523 is_expired = remaining < REFRESH_THRESHOLD;
524 BESDEBUG(MODULE, prolog << "is_expired: " << (is_expired?"true":"false") << endl);
525
526 return is_expired;
527 }
528
529} // namespace ngap
530
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
error thrown if there is a user syntax error in the request or any other user error
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:801
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
rapidjson::Document get_as_json()
get_as_json() This function returns the cached resource parsed into a JSON document.
virtual std::string query_parameter_value(const std::string &key) const
Definition: url_impl.cc:251