bes Updated for version 3.20.13
RemoteResource.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES http package, part of the Hyrax data server.
4
5// Copyright (c) 2020 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24// Authors:
25// ndp Nathan Potter <ndp@opendap.org>
26
27#include "config.h"
28
29#include <sys/stat.h>
30#include <sys/types.h>
31#include <unistd.h>
32
33#include <sstream>
34#include <fstream>
35#include <string>
36#include <iostream>
37#include <utility>
38
39#include "rapidjson/document.h"
40
41#include "BESInternalError.h"
42#include "BESForbiddenError.h"
43#include "BESSyntaxUserError.h"
44#include "BESNotFoundError.h"
45#include "BESTimeoutError.h"
46
47#include "BESDebug.h"
48#include "BESUtil.h"
49
50#include "HttpCache.h"
51#include "HttpUtils.h"
52#include "CurlUtils.h"
53#include "HttpNames.h"
54#include "RemoteResource.h"
55#include "TheBESKeys.h"
56#include "BESStopWatch.h"
57#include "BESLog.h"
58
59using namespace std;
60
61#define BES_CATALOG_ROOT_KEY "BES.Catalog.catalog.RootDirectory"
62
63#define prolog std::string("RemoteResource::").append(__func__).append("() - ")
64#define MODULE HTTP_MODULE
65
66namespace http {
67
68RemoteResource::RemoteResource(
69 std::shared_ptr<http::url> target_url,
70 const std::string &uid,
71 long long expiredInterval)
72 : d_remoteResourceUrl(std::move(target_url)){
73
74 d_fd = 0;
75 d_initialized = false;
76
77 d_uid = uid;
78
79 d_resourceCacheFileName.clear();
80 d_response_headers = new vector<string>();
81 d_http_response_headers = new map<string, string>();
82
83 d_expires_interval = expiredInterval;
84
85
86 if(d_remoteResourceUrl->protocol() == FILE_PROTOCOL){
87 BESDEBUG(MODULE,prolog << "Found FILE protocol." << endl);
88 d_resourceCacheFileName = d_remoteResourceUrl->path();
89 while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
90 // Strip trailing slashes, because this about files, not directories
91 d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
92 }
93 // Now we check that the data is in the BES_CATALOG_ROOT
94 string catalog_root;
95 bool found;
96 TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
97 if(!found){
98 throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
99 }
100 if(d_resourceCacheFileName.find(catalog_root) !=0 ){
101 d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
102 }
103 BESDEBUG(MODULE,"d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
104 d_initialized =true;
105 }
106 else if( d_remoteResourceUrl->protocol() == HTTPS_PROTOCOL || d_remoteResourceUrl->protocol() == HTTP_PROTOCOL ){
107 BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl->str() << endl);
108#if 0
109
110 if (!d_uid.empty()){
111 string client_id_hdr = "User-Id: " + d_uid;
112 BESDEBUG(MODULE, prolog << client_id_hdr << endl);
113 d_request_headers.push_back(client_id_hdr);
114 }
115 if (!d_echo_token.empty()){
116 string echo_token_hdr = "Echo-Token: " + d_echo_token;
117 BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
118 d_request_headers.push_back(echo_token_hdr);
119 }
120#endif
121
122 }
123 else {
124 string err = prolog + "Unsupported protocol: " + d_remoteResourceUrl->protocol();
125 throw BESInternalError(err, __FILE__, __LINE__);
126 }
127
128 // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
129
130}
131
132
133#if 0
139 RemoteResource::RemoteResource(const std::string &url, const std::string &uid, const std::string &echo_token) {
140
141 d_fd = 0;
142 d_initialized = false;
143
144 d_uid = uid;
145 d_echo_token = echo_token;
146
147 // d_curl = curl::init(url);
148
149 d_resourceCacheFileName.clear();
150 d_response_headers = new vector<string>();
151 d_request_headers = new vector<string>();
152 d_http_response_headers = new map<string, string>();
153
154 if (url.empty()) {
155 throw BESInternalError(prolog + "Remote resource URL is empty.", __FILE__, __LINE__);
156 }
157
158 if(url.find(FILE_PROTOCOL) == 0){
159 d_resourceCacheFileName = url.substr(strlen(FILE_PROTOCOL));
160 while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
161 // Strip trailing slashes, because this about files, not directories
162 d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
163 }
164 // Now we check that the data is in the BES_CATALOG_ROOT
165 string catalog_root;
166 bool found;
167 TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
168 if(!found){
169 throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
170 }
171 if(d_resourceCacheFileName.find(catalog_root) !=0 ){
172 d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
173 }
174 d_initialized =true;
175 }
176 else if(url.find(HTTPS_PROTOCOL) == 0 || url.find(HTTP_PROTOCOL) == 0){
177 d_remoteResourceUrl = url;
178 BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl << endl);
179
180 if (!d_uid.empty()){
181 string client_id_hdr = "User-Id: " + d_uid;
182 BESDEBUG(MODULE, prolog << client_id_hdr << endl);
183 d_request_headers->push_back(client_id_hdr);
184 }
185 if (!d_echo_token.empty()){
186 string echo_token_hdr = "Echo-Token: " + d_echo_token;
187 BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
188 d_request_headers->push_back(echo_token_hdr);
189 }
190 }
191 else {
192 string err = prolog + "Unsupported protocol: " + url;
193 throw BESInternalError(err, __FILE__, __LINE__);
194 }
195
196
197
198 // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
199 }
200#endif
201
202
207RemoteResource::~RemoteResource() {
208 BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl->str() << endl);
209
210 delete d_response_headers;
211 d_response_headers = 0;
212 BESDEBUG(MODULE, prolog << "Deleted d_response_headers." << endl);
213
214 if (!d_resourceCacheFileName.empty()) {
215 HttpCache *cache = HttpCache::get_instance();
216 if (cache) {
217 cache->unlock_and_close(d_resourceCacheFileName);
218 BESDEBUG(MODULE, prolog << "Closed and unlocked " << d_resourceCacheFileName << endl);
219 d_resourceCacheFileName.clear();
220 }
221 }
222 BESDEBUG(MODULE, prolog << "END" << endl);
223}
224
229std::string RemoteResource::getCacheFileName() {
230 if (!d_initialized) {
231 throw BESInternalError(prolog + "STATE ERROR: Remote Resource " + d_remoteResourceUrl->str() +
232 " has Not Been Retrieved.", __FILE__, __LINE__);
233 }
234 return d_resourceCacheFileName;
235}
236
244void RemoteResource::retrieveResource() {
245 std::map<std::string, std::string> content_filters;
246 retrieveResource(content_filters);
247}
248
260void RemoteResource::retrieveResource(const std::map<std::string, std::string> &content_filters) {
261 BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl->str() << endl);
262 bool mangle = true;
263
264 // TODO come back and visit this condition and determine if it is still needed jhrg/sbl 4.14.21
265 if (d_initialized) {
266 BESDEBUG(MODULE, prolog << "END Already initialized." << endl);
267 return;
268 }
269 // Get a pointer to the singleton cache instance for this process.
270 HttpCache *cache = HttpCache::get_instance();
271 if (!cache) {
272 ostringstream oss;
273 oss << prolog << "FAILED to get local cache. ";
274 oss << "Unable to proceed with request for " << this->d_remoteResourceUrl->str();
275 oss << " The server MUST have a valid HTTP cache configuration to operate." << endl;
276 BESDEBUG(MODULE, oss.str());
277 throw BESInternalError(oss.str(), __FILE__, __LINE__);
278 }
279
280 // Get the name of the file in the cache (either the code finds this file or
281 // or it makes it).
282 d_resourceCacheFileName = cache->get_cache_file_name(d_uid, d_remoteResourceUrl->str(), mangle);
283 BESDEBUG(MODULE, prolog << "d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
284
285 // @TODO MAKE THIS RETRIEVE THE CACHED DATA TYPE IF THE CACHED RESPONSE IF FOUND
286 // We need to know the type of the resource. HTTP headers are the preferred way to determine the type.
287 // Unfortunately, the current code losses both the HTTP headers sent from the request and the derived type
288 // to subsequent accesses of the cached object. Since we have to have a type, for now we just set the type
289 // from the url. If down below we DO an HTTP GET then the headers will be evaluated and the type set by setType()
290 // But really - we gotta fix this.
291 http::get_type_from_url(d_remoteResourceUrl->str(), d_type);
292 BESDEBUG(MODULE, prolog << "d_type: " << d_type << endl);
293
294 try {
295 if (cache->get_exclusive_lock(d_resourceCacheFileName, d_fd)) {
296 BESDEBUG(MODULE,
297 prolog << "Remote resource is already in cache. cache_file_name: " << d_resourceCacheFileName
298 << endl);
299
300 if (cached_resource_is_expired()) {
301 BESDEBUG(MODULE, prolog << "EXISTS - UPDATING " << endl);
302 update_file_and_headers(content_filters);
303 cache->exclusive_to_shared_lock(d_fd);
304 } else {
305 BESDEBUG(MODULE, prolog << "EXISTS - LOADING " << endl);
306 cache->exclusive_to_shared_lock(d_fd);
307 load_hdrs_from_file();
308 }
309 d_initialized = true;
310 return;
311 } else {
312 // Now we actually need to reach out across the interwebs and retrieve the remote resource and put it's
313 // content into a local cache file, given that it's not in the cache.
314 // First make an empty file and get an exclusive lock on it.
315 if (cache->create_and_lock(d_resourceCacheFileName, d_fd)) {
316 BESDEBUG(MODULE, prolog << "DOESN'T EXIST - CREATING " << endl);
317 update_file_and_headers(content_filters);
318 } else {
319 BESDEBUG(MODULE, prolog << " WAS CREATED - LOADING " << endl);
320 cache->get_read_lock(d_resourceCacheFileName, d_fd);
321 load_hdrs_from_file();
322 }
323 d_initialized = true;
324 return;
325 }
326
327 stringstream msg;
328 msg << prolog + "Failed to acquire cache read lock for remote resource: '";
329 msg << d_remoteResourceUrl->str() << endl;
330 throw BESInternalError(msg.str(), __FILE__, __LINE__);
331
332 }
333 catch (BESError &besError) {
334 BESDEBUG(MODULE, prolog << "Caught BESError. type: " << besError.get_bes_error_type() <<
335 " message: '" << besError.get_message() <<
336 "' file: " << besError.get_file() << " line: " << besError.get_line() <<
337 " Will unlock cache and re-throw." << endl);
338 cache->unlock_cache();
339 throw;
340 }
341 catch (...) {
342 BESDEBUG(MODULE, prolog << "Caught unknown exception. Will unlock cache and re-throw." << endl);
343 cache->unlock_cache();
344 throw;
345 }
346
347} //end RemoteResource::retrieveResource()
348
352void RemoteResource::update_file_and_headers(){
353 std::map<std::string, std::string> content_filters;
354 update_file_and_headers(content_filters);
355}
356
362void RemoteResource::update_file_and_headers(const std::map<std::string, std::string> &content_filters){
363
364 // Get a pointer to the singleton cache instance for this process.
365 HttpCache *cache = HttpCache::get_instance();
366 if (!cache) {
367 ostringstream oss;
368 oss << prolog << "FAILED to get local cache. ";
369 oss << "Unable to proceed with request for " << this->d_remoteResourceUrl->str();
370 oss << " The server MUST have a valid HTTP cache configuration to operate." << endl;
371 BESDEBUG(MODULE, oss.str());
372 throw BESInternalError(oss.str(), __FILE__, __LINE__);
373 }
374
375 // Write the remote resource to the cache file.
376 try {
377 writeResourceToFile(d_fd);
378 }
379 catch (...) {
380 // If things went south then we need to dump the file because we'll end up with an empty/bogus file clogging the cache
381 unlink(d_resourceCacheFileName.c_str());
382 throw;
383 }
384
385 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
386 // Filter the response file - If content_filters map is empty then nothing is done.
387 filter_retrieved_resource(content_filters);
388
389 // Write the headers to the appropriate cache file.
390 string hdr_filename = d_resourceCacheFileName + ".hdrs";
391 std::ofstream hdr_out(hdr_filename.c_str());
392 try {
393 for (size_t i = 0; i < this->d_response_headers->size(); i++) {
394 hdr_out << (*d_response_headers)[i] << endl;
395 }
396 }
397 catch (...) {
398 // If this fails for any reason we:
399 hdr_out.close(); // Close the stream
400 unlink(hdr_filename.c_str()); // unlink the file
401 unlink(d_resourceCacheFileName.c_str()); // unlink the primary cache file.
402 throw;
403 }
404
405 // #########################################################################################################
406
407 // Change the exclusive lock on the new file to a shared lock. This keeps
408 // other processes from purging the new file and ensures that the reading
409 // process can use it.
410 cache->exclusive_to_shared_lock(d_fd);
411 BESDEBUG(MODULE, prolog << "Converted exclusive cache lock to shared lock." << endl);
412
413 // Now update the total cache size info and purge if needed. The new file's
414 // name is passed into the purge method because this process cannot detect its
415 // own lock on the file.
416 unsigned long long size = cache->update_cache_info(d_resourceCacheFileName);
417 BESDEBUG(MODULE, prolog << "Updated cache info" << endl);
418
419 if (cache->cache_too_big(size)) {
420 cache->update_and_purge(d_resourceCacheFileName);
421 BESDEBUG(MODULE, prolog << "Updated and purged cache." << endl);
422 }
423 BESDEBUG(MODULE, prolog << "END" << endl);
424
425 return;
426} //end RemoteResource::update_file_and_headers()
427
431void RemoteResource::load_hdrs_from_file(){
432 string hdr_filename = d_resourceCacheFileName + ".hdrs";
433 std::ifstream hdr_ifs(hdr_filename.c_str());
434
435 if(!hdr_ifs.is_open()){
436 stringstream msg;
437 msg << "ERROR. Internal state error. The headers file: " << hdr_filename << " could not be opened for reading.";
438 BESDEBUG(MODULE, prolog << msg.str() << endl);
439 throw BESInternalError(msg.str(), __FILE__, __LINE__);
440 }
441
442 BESDEBUG(MODULE, prolog << "Reading response headers from: " << hdr_filename << endl);
443 for (std::string line; std::getline(hdr_ifs, line);) {
444 (*d_response_headers).push_back(line);
445 BESDEBUG(MODULE, prolog << "header: " << line << endl);
446 }
447 ingest_http_headers_and_type();
448} //end RemoteResource::load_hdrs_from_file()
449
457bool RemoteResource::cached_resource_is_expired(){
458 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
459
460 struct stat statbuf;
461 if (stat(d_resourceCacheFileName.c_str(), &statbuf) == -1){
462 throw BESNotFoundError(strerror(errno), __FILE__, __LINE__);
463 }//end if
464 BESDEBUG(MODULE, prolog << "File exists" << endl);
465
466 time_t cacheTime = statbuf.st_ctime;
467 BESDEBUG(MODULE, prolog << "Cache file creation time: " << cacheTime << endl);
468 time_t nowTime = time(0);
469 BESDEBUG(MODULE, prolog << "Time now: " << nowTime << endl);
470 double diffSeconds = difftime(nowTime,cacheTime);
471 BESDEBUG(MODULE, prolog << "Time difference between cacheTime and nowTime: " << diffSeconds << endl);
472
473 if (diffSeconds > d_expires_interval){
474 BESDEBUG(MODULE, prolog << " refresh = TRUE " << endl);
475 return true;
476 }
477 else{
478 BESDEBUG(MODULE, prolog << " refresh = FALSE " << endl);
479 return false;
480 }
481} //end RemoteResource::is_cache_resource_expired()
482
491void RemoteResource::writeResourceToFile(int fd) {
492
493 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
494 try {
495
496 BESStopWatch besTimer;
497 if (BESDebug::IsSet("rr") || BESDebug::IsSet(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
498 besTimer.start(prolog + "source url: " + d_remoteResourceUrl->str());
499 }
500
501 int status = lseek(fd, 0, SEEK_SET);
502 if (-1 == status)
503 throw BESNotFoundError("Could not seek within the response file.", __FILE__, __LINE__);
504 BESDEBUG(MODULE, prolog << "Reset file descriptor to start of file." << endl);
505
506 status = ftruncate(fd, 0);
507 if (-1 == status)
508 throw BESInternalError("Could not truncate the file prior to updating from remote. ", __FILE__, __LINE__);
509 BESDEBUG(MODULE, prolog << "Truncated file, length is zero." << endl);
510
511 BESDEBUG(MODULE, prolog << "Saving resource " << d_remoteResourceUrl << " to cache file " << d_resourceCacheFileName << endl);
512 curl::http_get_and_write_resource(d_remoteResourceUrl, fd, d_response_headers); // Throws BESInternalError if there is a curl error.
513
514 BESDEBUG(MODULE, prolog << "Resource " << d_remoteResourceUrl->str() << " saved to cache file " << d_resourceCacheFileName << endl);
515
516 // rewind the file
517 // FIXME I think the idea here is that we have the file open and we should just keep
518 // reading from it. But the container mechanism works with file names, so we will
519 // likely have to open the file again. If that's true, lets remove this call. jhrg 3.2.18
520 status = lseek(fd, 0, SEEK_SET);
521 if (-1 == status)
522 throw BESNotFoundError("Could not seek within the response file.", __FILE__, __LINE__);
523 BESDEBUG(MODULE, prolog << "Reset file descriptor to start of file." << endl);
524
525 // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
526 ingest_http_headers_and_type();
527 }
528 catch (BESError &e) {
529 throw;
530 }
531 BESDEBUG(MODULE, prolog << "END" << endl);
532}
533
537void RemoteResource::ingest_http_headers_and_type() {
538 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
539
540 const string colon_space = ": ";
541 for (size_t i = 0; i < this->d_response_headers->size(); i++) {
542 string header = (*d_response_headers)[i];
543 BESDEBUG(MODULE, prolog << "Processing header " << header << endl);
544 size_t colon_index = header.find(colon_space);
545 if(colon_index == string::npos){
546 BESDEBUG(MODULE, prolog << "Unable to locate the colon space \": \" delimiter in the header " <<
547 "string: '" << header << "' SKIPPING!" << endl);
548 }
549 else {
550 string key = BESUtil::lowercase(header.substr(0, colon_index));
551 string value = header.substr(colon_index + colon_space.length());
552 BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
553 (*d_http_response_headers)[key] = value;
554 }
555 }
556 BESDEBUG(MODULE, prolog << "Ingested " << d_http_response_headers->size() << " response headers." << endl);
557
558 std::map<string, string>::iterator it;
559 string type;
560
561 // Try and figure out the file type first from the
562 // Content-Disposition in the http header response.
563 BESDEBUG(MODULE, prolog << "Checking Content-Disposition headers for type information." << endl);
564 string content_disp_hdr;
565 content_disp_hdr = get_http_response_header("content-disposition");
566 if (!content_disp_hdr.empty()) {
567 // Content disposition exists, grab the filename
568 // attribute
569 http::get_type_from_disposition(content_disp_hdr, type);
570 BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << content_disp_hdr << "' matched type: \"" << type << "\"" << endl);
571 }
572
573 // still haven't figured out the type. Check the content-type
574 // next, translate to the BES MODULE name. It's also possible
575 // that even though Content-disposition was available, we could
576 // not determine the type of the file.
577 BESDEBUG(MODULE, prolog << "Checking Content-Type headers for type information." << endl);
578 string content_type = get_http_response_header("content-type");
579 if (type.empty() && !content_type.empty()) {
580 http::get_type_from_content_type(content_type, type);
581 BESDEBUG(MODULE,prolog << "Evaluated content-type '" << content_type << "' matched type \"" << type << "\"" << endl);
582 }
583
584 // still haven't figured out the type. Now check the actual URL
585 // and see if we can't match the URL to a MODULE name
586 BESDEBUG(MODULE, prolog << "Checking URL path for type information." << endl);
587 if (type.empty()) {
588 http::get_type_from_url(d_remoteResourceUrl->str(), type);
589 BESDEBUG(MODULE, prolog << "Evaluated url '" << d_remoteResourceUrl->str() << "' matched type: \"" << type << "\"" << endl);
590 }
591 // still couldn't figure it out, punt
592 if (type.empty()) {
593 string err = prolog + "Unable to determine the type of data"
594 + " returned from '" + d_remoteResourceUrl->str() + "' Setting type to 'unknown'";
595 BESDEBUG(MODULE, err << endl);
596 type = "unknown";
597 //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
598 }
599 d_type = type;
600 BESDEBUG(MODULE, prolog << "END (dataset type: " << d_type << ")" << endl);
601}
602
608std::string
609RemoteResource::get_http_response_header(const std::string header_name) {
610 string value("");
611 std::map<string, string>::iterator it;
612 it = d_http_response_headers->find(BESUtil::lowercase(header_name));
613 if (it != d_http_response_headers->end())
614 value = it->second;
615 return value;
616}
617
629void RemoteResource::filter_retrieved_resource(const std::map<std::string, std::string> &content_filters){
630
631 // No filters?
632 if(content_filters.empty()){
633 // No problem...
634 return;
635 }
636 string resource_content;
637 {
638 std::stringstream buffer;
639 // - - - - - - - - - - - - - - - - - - - - - - - -
640 // Read the cached file into a string object
641 std::ifstream cr_istrm(d_resourceCacheFileName);
642 if (!cr_istrm.is_open()) {
643 string msg = "Could not open '" + d_resourceCacheFileName + "' to read cached response.";
644 BESDEBUG(MODULE, prolog << msg << endl);
645 throw BESInternalError(msg, __FILE__, __LINE__);
646 }
647 buffer << cr_istrm.rdbuf();
648
649 // FIXME Do we need to make a copy here? Could we pass buffer.str() to replace_all??
650 resource_content = buffer.str();
651 } // cr_istrm is closed here.
652
653 for (const auto& apair : content_filters) {
654 unsigned int replace_count = BESUtil::replace_all(resource_content,apair.first, apair.second);
655 BESDEBUG(MODULE, prolog << "Replaced " << replace_count << " instance(s) of template(" <<
656 apair.first << ") with " << apair.second << " in cached RemoteResource" << endl);
657 }
658
659
660 // - - - - - - - - - - - - - - - - - - - - - - - -
661 // Replace the contents of the cached file with the modified string.
662 std::ofstream cr_ostrm(d_resourceCacheFileName);
663 if (!cr_ostrm.is_open()) {
664 string msg = "Could not open '" + d_resourceCacheFileName + "' to write modified cached response.";
665 BESDEBUG(MODULE, prolog << msg << endl);
666 throw BESInternalError(msg, __FILE__, __LINE__);
667 }
668 cr_ostrm << resource_content;
669
670}
671
675std::string RemoteResource::get_response_as_string() {
676
677 if(!d_initialized){
678 stringstream msg;
679 msg << "ERROR. Internal state error. " << __PRETTY_FUNCTION__ << " was called prior to retrieving resource.";
680 BESDEBUG(MODULE, prolog << msg.str() << endl);
681 throw BESInternalError(msg.str(), __FILE__, __LINE__);
682 }
683 string cache_file = getCacheFileName();
684 // - - - - - - - - - - - - - - - - - - - - - - - - - - -
685 // Set up cache file input stream.
686 std::ifstream file_istream(cache_file, std::ofstream::in);
687
688 // If the cache filename is not valid, the stream will not open. Empty is not valid.
689 if(file_istream.is_open()){
690 // If it's open we've got a valid input stream.
691 BESDEBUG(MODULE, prolog << "Using cached file: " << cache_file << endl);
692 std::stringstream buffer;
693 buffer << file_istream.rdbuf();
694 return buffer.str();
695 }
696 else {
697 stringstream msg;
698 msg << "ERROR. Failed to open cache file " << cache_file << " for reading.";
699 BESDEBUG(MODULE, prolog << msg.str() << endl);
700 throw BESInternalError(msg.str(), __FILE__, __LINE__);
701 }
702
703}
704
712rapidjson::Document RemoteResource::get_as_json() {
713 string response = get_response_as_string();
714 rapidjson::Document d;
715 d.Parse(response.c_str());
716 return d;
717}
718
722vector<string> *RemoteResource::getResponseHeaders() {
723 if (!d_initialized){
724 throw BESInternalError(prolog +"STATE ERROR: Remote Resource Has Not Been Retrieved.",__FILE__,__LINE__);
725 }
726 return d_response_headers;
727}
728
729
730#if 0
731void RemoteResource::setType(const vector<string> *resp_hdrs) {
732
733 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
734
735 string type = "";
736
737 // Try and figure out the file type first from the
738 // Content-Disposition in the http header response.
739 string disp;
740 string ctype;
741
742 if (resp_hdrs) {
743 vector<string>::const_iterator i = resp_hdrs->begin();
744 vector<string>::const_iterator e = resp_hdrs->end();
745 for (; i != e; i++) {
746 string hdr_line = (*i);
747
748 BESDEBUG(MODULE, prolog << "Evaluating header: " << hdr_line << endl);
749
750 hdr_line = BESUtil::lowercase(hdr_line);
751
752 string colon_space = ": ";
753 int index = hdr_line.find(colon_space);
754 string hdr_name = hdr_line.substr(0, index);
755 string hdr_value = hdr_line.substr(index + colon_space.length());
756
757 BESDEBUG(MODULE, prolog << "hdr_name: '" << hdr_name << "' hdr_value: '" << hdr_value << "' " << endl);
758
759 if (hdr_name.find("content-disposition") != string::npos) {
760 // Content disposition exists
761 BESDEBUG(MODULE, prolog << "Located content-disposition header." << endl);
762 disp = hdr_value;
763 }
764 if (hdr_name.find("content-type") != string::npos) {
765 BESDEBUG(MODULE, prolog << "Located content-type header." << endl);
766 ctype = hdr_value;
767 }
768 }
769 }
770
771 if (!disp.empty()) {
772 // Content disposition exists, grab the filename
773 // attribute
774 HttpUtils::Get_type_from_disposition(disp, type);
775 BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << disp << "' matched type: \"" << type << "\"" << endl);
776 }
777
778 // still haven't figured out the type. Check the content-type
779 // next, translate to the BES MODULE name. It's also possible
780 // that even though Content-disposition was available, we could
781 // not determine the type of the file.
782 if (type.empty() && !ctype.empty()) {
783 HttpUtils::Get_type_from_content_type(ctype, type);
784 BESDEBUG(MODULE,prolog << "Evaluated content-type '" << ctype << "' matched type \"" << type << "\"" << endl);
785 }
786
787 // still haven't figured out the type. Now check the actual URL
788 // and see if we can't match the URL to a MODULE name
789 if (type.empty()) {
790 HttpUtils::Get_type_from_url(d_remoteResourceUrl, type);
791 BESDEBUG(MODULE,prolog << "Evaluated url '" << d_remoteResourceUrl << "' matched type: \"" << type << "\"" << endl);
792 }
793
794 // still couldn't figure it out, punt
795 if (type.empty()) {
796 string err = prolog + "Unable to determine the type of data"
797 + " returned from '" + d_remoteResourceUrl + "' Setting type to 'unknown'";
798 BESDEBUG(MODULE, err << endl);
799 type = "unknown";
800 //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
801 }
802
803 // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
804
805 d_type = type;
806 }
807#endif
808
809
810} // namespace http
811
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
Base exception class for the BES with basic string message.
Definition: BESError.h:59
unsigned int get_line() const
get the line number where the exception was thrown
Definition: BESError.h:129
unsigned int get_bes_error_type() const
Return the return code for this error class.
Definition: BESError.h:157
std::string get_file() const
get the file name where the exception was thrown
Definition: BESError.h:120
std::string get_message() const
get the error message for this exception
Definition: BESError.h:111
virtual void unlock_and_close(const std::string &target)
virtual bool create_and_lock(const std::string &target, int &fd)
Create a file in the cache and lock it for write access.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
virtual bool get_read_lock(const std::string &target, int &fd)
Get a read-only lock on the file if it exists.
virtual bool get_exclusive_lock(const std::string &target, int &fd)
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:834
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
Definition: BESUtil.cc:921
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:254
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:751
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:282
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
void get_type_from_disposition(const string &disp, string &type)
Definition: HttpUtils.cc:110