bes Updated for version 3.20.13
CurlUtils.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2// This file is part of the BES http package, part of the Hyrax data server.
3//
4// Copyright (c) 2020 OPeNDAP, Inc.
5// Author: Nathan Potter <ndp@opendap.org>
6//
7// This library is free software; you can redistribute it and/or
8// modify it under the terms of the GNU Lesser General Public
9// License as published by the Free Software Foundation; either
10// version 2.1 of the License, or (at your option) any later version.
11//
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15// Lesser General Public License for more details.
16//
17// You should have received a copy of the GNU Lesser General Public
18// License along with this library; if not, write to the Free Software
19// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20//
21// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22// Authors:
23// ndp Nathan Potter <ndp@opendap.org>
24
25#include "config.h"
26
27#include <unistd.h>
28#include <sys/types.h>
29#include <fcntl.h>
30#include <time.h>
31
32#include <curl/curl.h>
33
34#include <cstdio>
35#include <sstream>
36#include <iostream>
37#include <map>
38#include <vector>
39#include <algorithm> // std::for_each
40#include <utility>
41
42#include "rapidjson/document.h"
43
44#include <BESContextManager.h>
45#include "BESSyntaxUserError.h"
46#include "BESForbiddenError.h"
47#include "BESNotFoundError.h"
48#include "BESTimeoutError.h"
49#include "BESInternalError.h"
50#include "BESDebug.h"
51#include "BESRegex.h"
52#include "TheBESKeys.h"
53#include "BESUtil.h"
54#include "BESLog.h"
55#include "BESStopWatch.h"
56
57#include "BESSyntaxUserError.h"
58#include "HttpNames.h"
59#include "HttpUtils.h"
60#include "ProxyConfig.h"
61#include "AllowedHosts.h"
62#include "CurlUtils.h"
63#include "EffectiveUrlCache.h"
64
65#include "url_impl.h"
66
67#define MODULE "curl"
68
69using std::endl;
70using std::string;
71using std::map;
72using std::vector;
73using std::stringstream;
74using std::ostringstream;
75using namespace http;
76
77#define prolog std::string("CurlUtils::").append(__func__).append("() - ")
78
79namespace curl {
80
81static const unsigned int retry_limit = 10; // Amazon's suggestion
82static const useconds_t uone_second = 1000 * 1000; // one second in micro seconds (which is 1000
83
84// Forward declaration
85curl_slist *add_edl_auth_headers(struct curl_slist *request_headers);
86
87// Set this to 1 to turn on libcurl's verbose mode (for debugging).
88int curl_trace = 0;
89
90#define CLIENT_ERR_MIN 400
91#define CLIENT_ERR_MAX 417
92const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
93 "Bad Request:",
94 "Unauthorized: Contact the server administrator.",
95 "Payment Required.",
96 "Forbidden: Contact the server administrator.",
97 "Not Found: The underlying data source or server could not be found.",
98 "Method Not Allowed.",
99 "Not Acceptable.",
100 "Proxy Authentication Required.",
101 "Request Time-out.",
102 "Conflict.",
103 "Gone.",
104 "Length Required.",
105 "Precondition Failed.",
106 "Request Entity Too Large.",
107 "Request URI Too Large.",
108 "Unsupported Media Type.",
109 "Requested Range Not Satisfiable.",
110 "Expectation Failed."
111};
112
113#define SERVER_ERR_MIN 500
114#define SERVER_ERR_MAX 505
115const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
116 {
117 "Internal Server Error.",
118 "Not Implemented.",
119 "Bad Gateway.",
120 "Service Unavailable.",
121 "Gateway Time-out.",
122 "HTTP Version Not Supported."
123 };
124
133string http_status_to_string(int status) {
134 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
135 return string(http_client_errors[status - CLIENT_ERR_MIN]);
136 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
137 return string(http_server_errors[status - SERVER_ERR_MIN]);
138 else {
139 stringstream msg;
140 msg << "Unknown HTTP Error: " << status;
141 return msg.str();
142 }
143}
144
150static string getCurlAuthTypeName(const int auth_type) {
151
152 string authTypeString;
153 int match;
154
155 match = auth_type & CURLAUTH_BASIC;
156 if (match) {
157 authTypeString += "CURLAUTH_BASIC";
158 }
159
160 match = auth_type & CURLAUTH_DIGEST;
161 if (match) {
162 if (!authTypeString.empty())
163 authTypeString += " ";
164 authTypeString += "CURLAUTH_DIGEST";
165 }
166
167 match = auth_type & CURLAUTH_DIGEST_IE;
168 if (match) {
169 if (!authTypeString.empty())
170 authTypeString += " ";
171 authTypeString += "CURLAUTH_DIGEST_IE";
172 }
173
174 match = auth_type & CURLAUTH_GSSNEGOTIATE;
175 if (match) {
176 if (!authTypeString.empty())
177 authTypeString += " ";
178 authTypeString += "CURLAUTH_GSSNEGOTIATE";
179 }
180
181 match = auth_type & CURLAUTH_NTLM;
182 if (match) {
183 if (!authTypeString.empty())
184 authTypeString += " ";
185 authTypeString += "CURLAUTH_NTLM";
186 }
187
188#if 0
189 match = auth_type & CURLAUTH_ANY;
190 if(match){
191 if(!authTypeString.empty())
192 authTypeString += " ";
193 authTypeString += "CURLAUTH_ANY";
194 }
195
196
197 match = auth_type & CURLAUTH_ANY;
198 if(match){
199 if(!authTypeString.empty())
200 authTypeString += " ";
201 authTypeString += "CURLAUTH_ANYSAFE";
202 }
203
204
205 match = auth_type & CURLAUTH_ANY;
206 if(match){
207 if(!authTypeString.empty())
208 authTypeString += " ";
209 authTypeString += "CURLAUTH_ONLY";
210 }
211#endif
212
213 return authTypeString;
214}
215
219static size_t writeNothing(char */* data */, size_t /* size */, size_t nmemb, void * /* userdata */) {
220 return nmemb;
221}
222
227 static size_t writeToOpenFileDescriptor(char *data, size_t /* size */, size_t nmemb, void *userdata) {
228
229 int *fd = (int *) userdata;
230
231 BESDEBUG(MODULE, prolog << "Bytes received " << nmemb << endl);
232 int wrote = write(*fd, data, nmemb);
233 BESDEBUG(MODULE, prolog << "Bytes written " << wrote << endl);
234
235 return wrote;
236 }
237
238
261static size_t save_http_response_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) {
262 BESDEBUG(MODULE, prolog << "Inside the header parser." << endl);
263 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
264
265 // Grab the header, minus the trailing newline. Or \r\n pair.
266 string complete_line;
267 if (nmemb > 1 && *(static_cast<char *>(ptr) + size * (nmemb - 2)) == '\r')
268 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
269 else
270 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
271
272 // Store all non-empty headers that are not HTTP status codes
273 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
274 BESDEBUG(MODULE, prolog << "Header line: " << complete_line << endl);
275 hdrs->push_back(complete_line);
276 }
277
278 return size * nmemb;
279}
280
281
289static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) {
290 string message(msg, size);
291
292 switch (info) {
293 case CURLINFO_TEXT:
294 BESDEBUG(MODULE, prolog << "Text: " << message << endl);
295 break;
296 case CURLINFO_HEADER_IN:
297 BESDEBUG(MODULE, prolog << "Header in: " << message << endl);
298 break;
299 case CURLINFO_HEADER_OUT:
300 BESDEBUG(MODULE, prolog << "Header out: " << endl << message << endl);
301 break;
302 case CURLINFO_DATA_IN:
303 BESDEBUG(MODULE, prolog << "Data in: " << message << endl);
304 break;
305 case CURLINFO_DATA_OUT:
306 BESDEBUG(MODULE, prolog << "Data out: " << message << endl);
307 break;
308 case CURLINFO_END:
309 BESDEBUG(MODULE, prolog << "End: " << message << endl);
310 break;
311#ifdef CURLINFO_SSL_DATA_IN
312 case CURLINFO_SSL_DATA_IN:
313 BESDEBUG(MODULE, prolog << "SSL Data in: " << message << endl ); break;
314#endif
315#ifdef CURLINFO_SSL_DATA_OUT
316 case CURLINFO_SSL_DATA_OUT:
317 BESDEBUG(MODULE, prolog << "SSL Data out: " << message << endl ); break;
318#endif
319 default:
320 BESDEBUG(MODULE, prolog << "Curl info: " << message << endl);
321 break;
322 }
323 return 0;
324}
325
326
332class BuildHeaders : public std::unary_function<const string &, void> {
333 struct curl_slist *d_cl;
334
335public:
336 BuildHeaders() : d_cl(0) {}
337
338 void operator()(const string &header) {
339 BESDEBUG(MODULE, prolog << "Adding '" << header.c_str() << "' to the header list." << endl);
340 d_cl = curl_slist_append(d_cl, header.c_str());
341 }
342
343 struct curl_slist *get_headers() {
344 return d_cl;
345 }
346};
347
368 bool configure_curl_handle_for_proxy(CURL *ceh, const string &target_url) {
369 BESDEBUG(MODULE, prolog << "BEGIN." << endl);
370
371 bool using_proxy = http::ProxyConfig::theOne()->is_configured();
372 if (using_proxy) {
373
374 BESDEBUG(MODULE, prolog << "Proxy has been configured..." << endl);
375
376 http::ProxyConfig *proxy = http::ProxyConfig::theOne();
377
378 // TODO remove these local variables (if possible) and pass the values into curl_easy_setopt() directly from HttpUtils
379 string proxyHost = proxy->host();
380 int proxyPort = proxy->port();
381 string proxyPassword = proxy->proxy_password();
382 string proxyUser = proxy->user();
383 string proxyUserPW = proxy->password();
384 int proxyAuthType = proxy->auth_type();
385 string no_proxy_regex = proxy->no_proxy_regex();
386
387
388 // Don't set up the proxy server for URLs that match the 'NoProxy'
389 // regex set in the gateway.conf file.
390
391 // Don't create the regex if the string is empty
392 if (!no_proxy_regex.empty()) {
393 BESDEBUG(MODULE, prolog << "Found NoProxyRegex." << endl);
394 BESRegex r(no_proxy_regex.c_str());
395 if (r.match(target_url.c_str(), target_url.length()) != -1) {
396 BESDEBUG(MODULE,
397 prolog << "Found NoProxy match. BESRegex: " << no_proxy_regex << "; Url: " << target_url
398 << endl);
399 using_proxy = false;
400 }
401 }
402
403 if (using_proxy) {
404 CURLcode res;
405 char error_buffer[CURL_ERROR_SIZE];
406
407 BESDEBUG(MODULE, prolog << "Setting up a proxy server." << endl);
408 BESDEBUG(MODULE, prolog << "Proxy host: " << proxyHost << endl);
409 BESDEBUG(MODULE, prolog << "Proxy port: " << proxyPort << endl);
410
411 set_error_buffer(ceh, error_buffer);
412
413 res = curl_easy_setopt(ceh, CURLOPT_PROXY, proxyHost.data());
414 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXY", error_buffer, __FILE__, __LINE__);
415
416 res = curl_easy_setopt(ceh, CURLOPT_PROXYPORT, proxyPort);
417 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYPORT", error_buffer, __FILE__, __LINE__);
418
419 // oddly "#ifdef CURLOPT_PROXYAUTH" doesn't work - even though CURLOPT_PROXYAUTH is defined and valued at 111 it
420 // fails the test. Eclipse hover over the CURLOPT_PROXYAUTH symbol shows: "CINIT(PROXYAUTH, LONG, 111)",
421 // for what that's worth
422
423 // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH
424 // As of 4/21/08 only NTLM, Digest and Basic work.
425
426 res = curl_easy_setopt(ceh, CURLOPT_PROXYAUTH, proxyAuthType);
427 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYAUTH", error_buffer, __FILE__, __LINE__);
428 BESDEBUG(MODULE, prolog << "Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
429
430 if (!proxyUser.empty()) {
431 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERNAME, proxyUser.data());
432 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYUSERNAME", error_buffer, __FILE__,
433 __LINE__);
434 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSERNAME : " << proxyUser << endl);
435
436 if (!proxyPassword.empty()) {
437 res = curl_easy_setopt(ceh, CURLOPT_PROXYPASSWORD, proxyPassword.data());
438 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYPASSWORD", error_buffer, __FILE__,
439 __LINE__);
440 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
441 }
442 } else if (!proxyUserPW.empty()) {
443 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
444 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYUSERPWD", error_buffer, __FILE__, __LINE__);
445 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
446 }
447 unset_error_buffer(ceh);
448 }
449 }
450 BESDEBUG(MODULE, prolog << "END. using_proxy: " << (using_proxy ? "true" : "false") << endl);
451 return using_proxy;
452 }
453
454#if 0
455 bool configure_curl_handle_for_proxy(CURL *ceh, const string &target_url) {
456 BESDEBUG(MODULE, prolog << "BEGIN." << endl);
457
458 bool using_proxy = false;
459
460 http::ProxyConfig *proxy = http::ProxyConfig::TheConfig();
461
462 // TODO remove these local variables (if possible) and pass the values into curl_easy_setopt() directly from HttpUtils
463 string proxyHost = proxy->host();
464 int proxyPort = proxy->port();
465 string proxyPassword = proxy->proxy_password();
466 string proxyUser = proxy->user();
467 string proxyUserPW = proxy->password();
468 int proxyAuthType = proxy->auth_type();
469 string no_proxy_regex = proxy->no_proxy_regex();
470
471 if (!proxyHost.empty()) {
472 using_proxy = true;
473 if (proxyPort == 0)
474 proxyPort = 8080;
475
476 // Apparently we don't need this...
477 //if(proxyProtocol.empty())
478 // proxyProtocol = "http";
479
480 }
481 if (using_proxy) {
482 BESDEBUG(MODULE, prolog << "Found proxy configuration." << endl);
483
484 // Don't set up the proxy server for URLs that match the 'NoProxy'
485 // regex set in the gateway.conf file.
486
487 // Don't create the regex if the string is empty
488 if (!no_proxy_regex.empty()) {
489 BESDEBUG(MODULE, prolog << "Found NoProxyRegex." << endl);
490 BESRegex r(no_proxy_regex.c_str());
491 if (r.match(target_url.c_str(), target_url.length()) != -1) {
492 BESDEBUG(MODULE,
493 prolog << "Found NoProxy match. BESRegex: " << no_proxy_regex << "; Url: " << target_url
494 << endl);
495 using_proxy = false;
496 }
497 }
498
499 if (using_proxy) {
500 CURLcode res;
501 char error_buffer[CURL_ERROR_SIZE];
502
503 BESDEBUG(MODULE, prolog << "Setting up a proxy server." << endl);
504 BESDEBUG(MODULE, prolog << "Proxy host: " << proxyHost << endl);
505 BESDEBUG(MODULE, prolog << "Proxy port: " << proxyPort << endl);
506
507 set_error_buffer(ceh, error_buffer);
508
509 res = curl_easy_setopt(ceh, CURLOPT_PROXY, proxyHost.data());
510 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXY", error_buffer, __FILE__, __LINE__);
511
512 res = curl_easy_setopt(ceh, CURLOPT_PROXYPORT, proxyPort);
513 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYPORT", error_buffer, __FILE__, __LINE__);
514
515 // oddly "#ifdef CURLOPT_PROXYAUTH" doesn't work - even though CURLOPT_PROXYAUTH is defined and valued at 111 it
516 // fails the test. Eclipse hover over the CURLOPT_PROXYAUTH symbol shows: "CINIT(PROXYAUTH, LONG, 111)",
517 // for what that's worth
518
519 // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH
520 // As of 4/21/08 only NTLM, Digest and Basic work.
521
522 res = curl_easy_setopt(ceh, CURLOPT_PROXYAUTH, proxyAuthType);
523 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYAUTH", error_buffer, __FILE__, __LINE__);
524 BESDEBUG(MODULE, prolog << "Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
525
526 if (!proxyUser.empty()) {
527 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERNAME, proxyUser.data());
528 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYUSERNAME", error_buffer, __FILE__, __LINE__);
529 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSERNAME : " << proxyUser << endl);
530
531 if (!proxyPassword.empty()) {
532 res = curl_easy_setopt(ceh, CURLOPT_PROXYPASSWORD, proxyPassword.data());
533 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYPASSWORD", error_buffer, __FILE__,
534 __LINE__);
535 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
536 }
537 }
538 else if (!proxyUserPW.empty()) {
539 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
540 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PROXYUSERPWD", error_buffer, __FILE__, __LINE__);
541 BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
542 }
543 unset_error_buffer(ceh);
544 }
545 }
546 BESDEBUG(MODULE, prolog << "END." << endl);
547
548 return using_proxy;
549 }
550#endif
551
552CURL *init(const string &target_url,
553 const struct curl_slist *http_request_headers,
554 vector<string> *http_response_hdrs) {
555 CURL *swanky_new_curl_easy_handle = curl_easy_init();
556 return init(swanky_new_curl_easy_handle, target_url, http_request_headers, http_response_hdrs);
557}
558
573CURL *init(CURL *ceh,
574 const string &target_url,
575 const struct curl_slist *http_request_headers,
576 vector<string> *http_response_hdrs
577) {
578 char error_buffer[CURL_ERROR_SIZE];
579 error_buffer[0] = 0; // Null terminate this string for safety.
580 CURLcode res;
581
582 if (!ceh)
583 throw BESInternalError("Could not initialize cURL easy handle.", __FILE__, __LINE__);
584
585 // SET Error Buffer (for use during this setup) ----------------------------------------------------------------
586 set_error_buffer(ceh, error_buffer);
587
588 // Target URL --------------------------------------------------------------------------------------------------
589 res = curl_easy_setopt(ceh, CURLOPT_URL, target_url.c_str());
590 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_URL", error_buffer, __FILE__, __LINE__);
591
592 // Load in the default headers to send with a request. The empty Pragma
593 // headers overrides libcurl's default Pragma: no-cache header (which
594 // will disable caching by Squid, etc.).
595 // the empty Pragma never appears in the outgoing headers when this isn't present
596 // d_request_headers->push_back(string("Pragma: no-cache"));
597 // d_request_headers->push_back(string("Cache-Control: no-cache"));
598
599 //TODO Do we need this test? what if the pointer is null? Probably it's fine...
600 if (http_request_headers) {
601 // Add the http_request_headers to the cURL handle.
602 res = curl_easy_setopt(ceh, CURLOPT_HTTPHEADER, http_request_headers);
603 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", error_buffer, __FILE__, __LINE__);
604 }
605
606
607 if (http_response_hdrs) {
608 res = curl_easy_setopt(ceh, CURLOPT_HEADERFUNCTION, save_http_response_headers);
609 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERFUNCTION", error_buffer, __FILE__, __LINE__);
610
611 // Pass save_http_response_headers() a pointer to the vector<string> where the
612 // response headers may be stored. Callers can use the resp_hdrs
613 // value/result parameter to get the raw response header information .
614 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, http_response_hdrs);
615 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEHEADER", error_buffer, __FILE__, __LINE__);
616 }
617
618 // Allow compressed responses. Sending an empty string enables all supported compression types.
619#ifndef CURLOPT_ACCEPT_ENCODING
620 res = curl_easy_setopt(ceh, CURLOPT_ENCODING, "");
621 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_ENCODING", error_buffer, __FILE__, __LINE__);
622#else
623 res = curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
624 check_setopt_result(res, prolog, "CURLOPT_ACCEPT_ENCODING", error_buffer, __FILE__,__LINE__);
625#endif
626 // Disable Progress Meter
627 res = curl_easy_setopt(ceh, CURLOPT_NOPROGRESS, 1L);
628 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NOPROGRESS", error_buffer, __FILE__, __LINE__);
629
630 // Disable cURL signal handling
631 res = curl_easy_setopt(ceh, CURLOPT_NOSIGNAL, 1L);
632 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NOSIGNAL", error_buffer, __FILE__, __LINE__);
633
634
635 // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
636 // Authentication config.
637 //
638
639 // We have to set FailOnError to false for any of the non-Basic
640 // authentication schemes to work. 07/28/03 jhrg
641 res = curl_easy_setopt(ceh, CURLOPT_FAILONERROR, 0L);
642 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FAILONERROR", error_buffer, __FILE__, __LINE__);
643
644
645 // CURLAUTH_ANY means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
646 // choosing the the 'safest' one supported by the server.
647 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
648 res = curl_easy_setopt(ceh, CURLOPT_HTTPAUTH, (long) CURLAUTH_ANY);
649 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPAUTH", error_buffer, __FILE__, __LINE__);
650
651
652 // CURLOPT_NETRC means to use the netrc file for credentials.
653 // CURL_NETRC_OPTIONAL Means that if the supplied URL contains a username
654 // and password to prefer that to using the content of the netrc file.
655 res = curl_easy_setopt(ceh, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
656 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC", error_buffer, __FILE__, __LINE__);
657
658 // If the configuration specifies a particular .netrc credentials file, use it.
659 string netrc_file = get_netrc_filename();
660 if (!netrc_file.empty()) {
661 res = curl_easy_setopt(ceh, CURLOPT_NETRC_FILE, netrc_file.c_str());
662 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC_FILE", error_buffer, __FILE__, __LINE__);
663
664 }
665 VERBOSE(prolog << " is using the netrc file '"
666 << ((!netrc_file.empty()) ? netrc_file : "~/.netrc") << "'" << endl);
667
668
669 // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
670 // Cookies
671 //
672 res = curl_easy_setopt(ceh, CURLOPT_COOKIEFILE, curl::get_cookie_filename().c_str());
673 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEFILE", error_buffer, __FILE__, __LINE__);
674
675 res = curl_easy_setopt(ceh, CURLOPT_COOKIEJAR, curl::get_cookie_filename().c_str());
676 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEJAR", error_buffer, __FILE__, __LINE__);
677
678 // save_http_response_headers
679
680 // Follow 302 (redirect) responses
681 res = curl_easy_setopt(ceh, CURLOPT_FOLLOWLOCATION, 1L);
682 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FOLLOWLOCATION", error_buffer, __FILE__, __LINE__);
683
684 res = curl_easy_setopt(ceh, CURLOPT_MAXREDIRS, max_redirects());
685 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_MAXREDIRS", error_buffer, __FILE__, __LINE__);
686
687 // Set the user agent to Hyrax's user agent value
688 res = curl_easy_setopt(ceh, CURLOPT_USERAGENT, hyrax_user_agent().c_str());
689 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_USERAGENT", error_buffer, __FILE__, __LINE__);
690
691#if 0
692 // If the user turns off SSL validation...
693if (!d_rcr->get_validate_ssl() == 0) {
694 res = curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
695 check_setopt_result(res, prolog, "CURLOPT_SSL_VERIFYPEER", error_buffer, __FILE__, __LINE__);
696 res = curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
697 check_setopt_result(res, prolog, "CURLOPT_SSL_VERIFYHOST", error_buffer, __FILE__, __LINE__);
698}
699#endif
700
701 if (curl_trace) {
702 BESDEBUG(MODULE, prolog << "Curl version: " << curl_version() << endl);
703 res = curl_easy_setopt(ceh, CURLOPT_VERBOSE, 1L);
704 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_VERBOSE", error_buffer, __FILE__, __LINE__);
705 BESDEBUG(MODULE, prolog << "Curl in verbose mode." << endl);
706
707 res = curl_easy_setopt(ceh, CURLOPT_DEBUGFUNCTION, curl_debug);
708 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_DEBUGFUNCTION", error_buffer, __FILE__, __LINE__);
709 BESDEBUG(MODULE, prolog << "Curl debugging function installed." << endl);
710 }
711
712 // We unset the error buffer here because we know that curl::configure_curl_handle_for_proxy() will use it's own.
713 unset_error_buffer(ceh);
714 // Configure the a proxy for this url (if appropriate).
715 curl::configure_curl_handle_for_proxy(ceh, target_url);
716
717 BESDEBUG(MODULE, prolog << "curl: " << (void *) ceh << endl);
718 return ceh;
719}
720
721string get_range_arg_string(const unsigned long long &offset, const unsigned long long &size) {
722 ostringstream range; // range-get needs a string arg for the range
723 range << offset << "-" << offset + size - 1;
724 BESDEBUG(MODULE, prolog << " range: " << range.str() << endl);
725 return range.str();
726}
727
743CURL *init_effective_url_retriever_handle(const string &target_url, struct curl_slist *req_headers,
744 vector<string> &resp_hdrs) {
745 char error_buffer[CURL_ERROR_SIZE];
746 CURLcode res;
747 CURL *ceh = 0;
748
749 error_buffer[0] = 0; // null terminate empty string
750
751 ceh = curl::init(target_url, req_headers, &resp_hdrs);
752
753 set_error_buffer(ceh, error_buffer);
754
755 // get the offset to offset + size bytes
756 res = curl_easy_setopt(ceh, CURLOPT_RANGE, get_range_arg_string(0, 4).c_str());
757 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_RANGE", error_buffer, __FILE__, __LINE__);
758
759 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, writeNothing);
760 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", error_buffer, __FILE__, __LINE__);
761
762 // Pass save_raw_http_headers() a pointer to the vector<string> where the
763 // response headers may be stored. Callers can use the resp_hdrs
764 // value/result parameter to get the raw response header information .
765 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, &resp_hdrs);
766 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEHEADER", error_buffer, __FILE__, __LINE__);
767
768 unset_error_buffer(ceh);
769
770 return ceh;
771}
772
790void http_get_and_write_resource(const std::shared_ptr<http::url>& target_url,
791 const int fd,
792 vector<string> *http_response_headers) {
793
794 char error_buffer[CURL_ERROR_SIZE];
795 CURLcode res;
796 CURL *ceh = nullptr;
797 curl_slist *req_headers = nullptr;
798 BuildHeaders header_builder;
799
800 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
801 // Before we do anything, make sure that the URL is OK to pursue.
802 if (!http::AllowedHosts::theHosts()->is_allowed(target_url)) {
803 string err = (string) "The specified URL " + target_url->str()
804 + " does not match any of the accessible services in"
805 + " the allowed hosts list.";
806 BESDEBUG(MODULE, prolog << err << endl);
807 throw BESSyntaxUserError(err, __FILE__, __LINE__);
808 }
809
810 // Add the authorization headers
811 req_headers = add_edl_auth_headers(req_headers);
812
813 try {
814 // OK! Make the cURL handle
815 ceh = init(target_url->str(), req_headers, http_response_headers);
816
817 set_error_buffer(ceh, error_buffer);
818
819 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, writeToOpenFileDescriptor);
820 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", error_buffer, __FILE__, __LINE__);
821
822#ifdef CURLOPT_WRITEDATA
823 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA, &fd);
824 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEDATA", error_buffer, __FILE__, __LINE__);
825#else
826 res = curl_easy_setopt(ceh, CURLOPT_FILE, &fd);
827 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FILE", error_buffer, __FILE__, __LINE__);
828#endif
829 unset_error_buffer(ceh);
830
831 super_easy_perform(ceh, fd);
832
833 // Free the header list
834 if (req_headers)
835 curl_slist_free_all(req_headers);
836 if (ceh)
837 curl_easy_cleanup(ceh);
838 BESDEBUG(MODULE, prolog << "Called curl_easy_cleanup()." << endl);
839 }
840 catch (...) {
841 if (req_headers)
842 curl_slist_free_all(req_headers);
843 if (ceh)
844 curl_easy_cleanup(ceh);
845 throw;
846 }
847 BESDEBUG(MODULE, prolog << "END" << endl);
848}
849
857string error_message(const CURLcode response_code, char *error_buffer) {
858 std::ostringstream oss;
859 size_t len = strlen(error_buffer);
860 if (len) {
861 oss << "cURL_error_buffer: '" << error_buffer;
862 }
863 oss << "' cURL_message: '" << curl_easy_strerror(response_code);
864 oss << "' (code: " << (int) response_code << ")";
865 return oss.str();
866}
867
868/*
869* @brief Callback passed to libcurl to handle reading a single byte.
870*
871* This callback assumes that the size of the data is small enough
872* that all of the bytes will be either read at once or that a local
873 * temporary buffer can be used to build up the values.
874*
875* @param buffer Data from libcurl
876* @param size Number of bytes
877* @param nmemb Total size of data in this call is 'size * nmemb'
878* @param data Pointer to this
879* @return The number of bytes read
880*/
881size_t c_write_data(void *buffer, size_t size, size_t nmemb, void *data) {
882 size_t nbytes = size * nmemb;
883 //cerr << "ngap_write_data() bytes: " << nbytes << " size: " << size << " nmemb: " << nmemb << " buffer: " << buffer << " data: " << data << endl;
884 memcpy(data, buffer, nbytes);
885 return nbytes;
886}
887
894std::string http_get_as_string(const std::string &target_url) {
895
896 // @TODO @FIXME Make the size of this buffer one of:
897 // a) A configuration setting.
898 // b) A new parameter to the function. (unsigned long)
899 // c) Do a HEAD on the URL, check for the Content-Length header and plan accordingly.
900 //
901 char response_buf[1024 * 1024];
902
903 http_get(target_url, response_buf);
904 string response(response_buf);
905 return response;
906}
907
915rapidjson::Document http_get_as_json(const std::string &target_url) {
916
917 // @TODO @FIXME Make the size of this buffer one of:
918 // a) A configuration setting.
919 // b) A new parameter to the function. (unsigned long)
920 // c) Do a HEAD on the URL, check for the Content-Length header and plan accordingly.
921 //
922
923 char response_buf[1024 * 1024];
924
925 curl::http_get(target_url, response_buf);
926 rapidjson::Document d;
927 d.Parse(response_buf);
928 return d;
929}
930
936void http_get(const std::string &target_url, char *response_buf) {
937
938 char errbuf[CURL_ERROR_SIZE];
939 CURL *ceh = nullptr;
940 CURLcode res;
941
942 curl_slist *request_headers = nullptr;
943 // Add the authorization headers
944 request_headers = add_edl_auth_headers(request_headers);
945
946 try {
947
948 ceh = curl::init(target_url, request_headers, nullptr);
949 if (!ceh)
950 throw BESInternalError(string("ERROR! Failed to acquire cURL Easy Handle! "), __FILE__, __LINE__);
951
952 // Error Buffer (for use during this setup) ----------------------------------------------------------------
953 set_error_buffer(ceh, errbuf);
954
955 // Pass all data to the 'write_data' function --------------------------------------------------------------
956 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, c_write_data);
957 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", errbuf, __FILE__, __LINE__);
958
959 // Pass this to write_data as the fourth argument ----------------------------------------------------------
960 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA, reinterpret_cast<void *>(response_buf));
961 eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEDATA", errbuf, __FILE__, __LINE__);
962
963 unset_error_buffer(ceh);
964
965 super_easy_perform(ceh);
966
967 if (request_headers)
968 curl_slist_free_all(request_headers);
969 if (ceh)
970 curl_easy_cleanup(ceh);
971 }
972 catch (...) {
973 if (request_headers)
974 curl_slist_free_all(request_headers);
975 if (ceh)
976 curl_easy_cleanup(ceh);
977 }
978}
979
980#if 0
988CURL *set_up_easy_handle(const string &target_url, struct curl_slist *request_headers, char *response_buff) {
989 char errbuf[CURL_ERROR_SIZE];
990 CURL *d_handle;
991 CURLcode res;
992
993 d_handle = curl::init(target_url,request_headers,NULL);
994 if (!d_handle)
995 throw BESInternalError(string("ERROR! Failed to acquire cURL Easy Handle! "), __FILE__, __LINE__);
996
997 // Error Buffer (for use during this setup) --------------------------------------------------------------------
998 set_error_buffer(d_handle,errbuf);
999
1000 // Pass all data to the 'write_data' function ------------------------------------------------------------------
1001 res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, c_write_data);
1002 check_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", errbuf, __FILE__, __LINE__);
1003
1004 // Pass this to write_data as the fourth argument --------------------------------------------------------------
1005 res = curl_easy_setopt(d_handle, CURLOPT_WRITEDATA, reinterpret_cast<void *>(response_buff));
1006 check_setopt_result(res, prolog, "CURLOPT_WRITEDATA", errbuf, __FILE__, __LINE__);
1007
1008#if 0
1009 // handled by curl::init() - SBL 9.10.20
1010 // Follow redirects --------------------------------------------------------------------------------------------
1011 res = curl_easy_setopt(d_handle, CURLOPT_FOLLOWLOCATION, 1L);
1012 check_setopt_result(res, prolog, "CURLOPT_FOLLOWLOCATION", errbuf, __FILE__, __LINE__);
1013
1014 // Use cookies -------------------------------------------------------------------------------------------------
1015 res = curl_easy_setopt(d_handle, CURLOPT_COOKIEFILE, cookies_file.c_str());
1016 check_setopt_result(res, prolog, "CURLOPT_COOKIEFILE", errbuf, __FILE__, __LINE__);
1017
1018 res = curl_easy_setopt(d_handle, CURLOPT_COOKIEJAR, cookies_file.c_str());
1019 check_setopt_result(res, prolog, "CURLOPT_COOKIEJAR", errbuf, __FILE__, __LINE__);
1020
1021 // Authenticate using best available ---------------------------------------------------------------------------
1022 res = curl_easy_setopt(d_handle, CURLOPT_HTTPAUTH, (long) CURLAUTH_ANY);
1023 check_setopt_result(res, prolog, "CURLOPT_HTTPAUTH", errbuf, __FILE__, __LINE__);
1024
1025 // Use .netrc for credentials ----------------------------------------------------------------------------------
1026 res = curl_easy_setopt(d_handle, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
1027 check_setopt_result(res, prolog, "CURLOPT_NETRC", errbuf, __FILE__, __LINE__);
1028
1029 // If the configuration specifies a particular .netrc credentials file, use it. --------------------------------
1030 string netrc_file = get_netrc_filename();
1031 if (!netrc_file.empty()) {
1032 res = curl_easy_setopt(d_handle, CURLOPT_NETRC_FILE, netrc_file.c_str());
1033 check_setopt_result(res, prolog, "CURLOPT_NETRC_FILE", errbuf, __FILE__, __LINE__);
1034 }
1035
1036 VERBOSE(__FILE__ << "::get_easy_handle() is using the netrc file '"
1037 << ((!netrc_file.empty()) ? netrc_file : "~/.netrc") << "'" << endl);
1038#endif
1039
1040 unset_error_buffer(d_handle);
1041
1042 return d_handle;
1043}
1044#endif
1045
1065void super_easy_perform(CURL *c_handle){
1066 int fd = -1;
1067 super_easy_perform(c_handle, fd);
1068}
1069
1070void super_easy_perform(CURL *c_handle, const int fd)
1071{
1072 unsigned int attempts = 0;
1073 useconds_t retry_time = uone_second / 4;
1074 bool success;
1075 CURLcode curl_code;
1076 char curlErrorBuf[CURL_ERROR_SIZE];
1077 string target_url;
1078
1079 string empty_str;
1080 target_url = get_effective_url(c_handle, empty_str);
1081 // We check the value of target_url to see if the URL was correctly set in the cURL handle.
1082 if (target_url.empty())
1083 throw BESInternalError("URL acquisition failed.", __FILE__, __LINE__);
1084
1085 // SET Error Buffer --------------------------------------------------------------------------------------------
1086 set_error_buffer(c_handle, curlErrorBuf);
1087 do {
1088 curlErrorBuf[0] = 0; // Initialize to empty string
1089 ++attempts;
1090 BESDEBUG(MODULE, prolog << "Requesting URL: " << target_url << " attempt: " << attempts << endl);
1091
1092 curl_code = curl_easy_perform(c_handle);
1093 success = eval_curl_easy_perform_code(c_handle, target_url, curl_code, curlErrorBuf, attempts);
1094 if (success) {
1095 // Nothing obvious went wrong with the curl_easy_perform() so now we check the HTTP stuff
1096 success = eval_http_get_response(c_handle, curlErrorBuf, target_url);
1097 }
1098 // If the curl_easy_perform failed, or if the http request failed then
1099 // we keep trying until we have exceeded the retry_limit.
1100 if (!success) {
1101 if (attempts == retry_limit) {
1102 stringstream msg;
1103 msg << prolog << "ERROR - Made " << retry_limit << " failed attempts to retrieve the URL " << target_url;
1104 msg << " The retry limit has been exceeded. Giving up!";
1105 ERROR_LOG(msg.str() << endl);
1106 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1107 }
1108 else {
1109 ERROR_LOG(prolog << "ERROR - Problem with data transfer. Will retry (url: " << target_url <<
1110 " attempt: " << attempts << ")." << endl);
1111 usleep(retry_time);
1112 retry_time *= 2;
1113
1114 if( fd >= 0 ){
1115 // Thanks to Stevens APitUE
1116
1117 // Check the output file descriptor
1118 int val = fcntl(fd, F_GETFL, 0);
1119 if(val < 0){
1120 stringstream ss;
1121 ss << prolog << "Encountered fcntl error " << val << " for fd: " << fd << endl;
1122 BESDEBUG(MODULE, ss.str());
1123 ERROR_LOG(ss.str());
1124 }
1125 else {
1126 int accmode = val & O_ACCMODE;
1127#if 1
1128 // -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
1129 if (accmode == O_RDONLY) {
1130 BESDEBUG(MODULE, prolog << " FILE " << fd << " is open and read only" << endl);
1131 }
1132 else if (accmode == O_WRONLY) {
1133 BESDEBUG(MODULE, prolog << " FILE " << fd << " is open and write only" << endl);
1134 }
1135 else if (accmode == O_RDWR) {
1136 BESDEBUG(MODULE, prolog << " FILE " << fd << " is open for read and write" << endl);
1137 }
1138 else {
1139 stringstream ss;
1140 ss << prolog << "ERROR Unknown access mode mode for FILE '" << fd << "'" << endl;
1141 BESDEBUG(MODULE, ss.str());
1142 ERROR_LOG(ss.str());
1143 }
1144 // -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
1145#endif
1146 // Reset output file pointer here to clear any document returned with the error response
1147 if (accmode == O_WRONLY || accmode == O_RDWR){
1148 int status = ftruncate(fd, 0);
1149 if (-1 == status)
1150 throw BESInternalError("Could not truncate the file prior to retrying from remote. ", __FILE__, __LINE__);
1151 BESDEBUG(MODULE, prolog << "Truncated file, length is zero." << endl);
1152 }
1153
1154 // FIXME Now what about the memory buffer case? How do we solve the same issue there?
1155 }
1156
1157 }
1158
1159 }
1160 }
1161 } while (!success);
1162 // Unset the buffer as it goes out of scope
1163 unset_error_buffer(c_handle);
1164}
1165
1166#if 0
1167
1172void read_data(CURL *c_handle) {
1173
1174 unsigned int attempts = 0;
1175 useconds_t retry_time = uone_second / 4;
1176 bool success;
1177 CURLcode curl_code;
1178 char curlErrorBuf[CURL_ERROR_SIZE];
1179 char *urlp = NULL;
1180
1181 curl_easy_getinfo(c_handle, CURLINFO_EFFECTIVE_URL, &urlp);
1182 // Checking the curl_easy_getinfo return value in this case is pointless. If it's CURLE_OK then we
1183 // still have to check the value of urlp to see if the URL was correctly set in the
1184 // cURL handle. If it fails then it fails, and urlp is not set. If we just focus on the value of urlp then
1185 // we can just check the one thing.
1186 if (!urlp)
1187 throw BESInternalError("URL acquisition failed.", __FILE__, __LINE__);
1188
1189 // SET Error Buffer --------------------------------------------------------------------------------------------
1190 set_error_buffer(c_handle, curlErrorBuf);
1191 do {
1192 // bool do_retry;
1193 curlErrorBuf[0]=0; // Initialize to empty string
1194 ++attempts;
1195 BESDEBUG(MODULE, prolog << "Requesting URL: " << urlp << " attempt: " << attempts << endl);
1196
1197 curl_code = curl_easy_perform(c_handle);
1198 success = eval_curl_easy_perform_code(c_handle, urlp, curl_code, curlErrorBuf, attempts);
1199 if(success){
1200 // Nothing obvious went wrong with the curl_easy_perfom() so now we check the HTTP stuff
1201 success = eval_http_get_response(c_handle, urlp);
1202 }
1203 // If the curl_easy_perform failed, or if the http request failed then
1204 // we keep trying until we have exceeded the retry_limit.
1205 if (!success) {
1206 if (attempts == retry_limit) {
1207 string msg = prolog + "ERROR - Problem with data transfer. Number of re-tries exceeded. Giving up.";
1208 LOG(msg << endl);
1209 throw BESInternalError(msg, __FILE__, __LINE__);
1210 }
1211 else {
1212 LOG(prolog << "ERROR - Problem with data transfer. Will retry (url: " << urlp <<
1213 " attempt: " << attempts << ")." << endl);
1214 usleep(retry_time);
1215 retry_time *= 2;
1216 }
1217 }
1218 } while (!success);
1219
1220#if 0
1221 // Try until retry_limit or success...
1222 do {
1223 curlErrorBuf[0] = 0; // clear the error buffer with a null termination at index 0.
1224 curl_code = curl_easy_perform(c_handle); // Do the thing...
1225 ++tries;
1226
1227 if (CURLE_OK != curl_code) { // Failure here is not an HTTP error, but a cURL error.
1228 throw BESInternalError(
1229 string("read_data() - ERROR! Message: ").append(error_message(curl_code, curlErrorBuf)),
1230 __FILE__, __LINE__);
1231 }
1232
1233 success = eval_get_response(c_handle, urlp);
1234 // if(debug) cout << ngap_curl::probe_easy_handle(c_handle) << endl;
1235 if (!success) {
1236 if (tries == retry_limit) {
1237 string msg = prolog + "Data transfer error: Number of re-tries exceeded: "+ error_message(curl_code, curlErrorBuf);
1238 LOG(msg << endl);
1239 throw BESInternalError(msg, __FILE__, __LINE__);
1240 }
1241 else {
1242 if (BESDebug::IsSet(MODULE)) {
1243 stringstream ss;
1244 ss << "HTTP transfer 500 error, will retry (trial " << tries << " for: " << urlp << ").";
1245 BESDEBUG(MODULE, ss.str());
1246 }
1247 usleep(retry_time);
1248 retry_time *= 2;
1249 }
1250 }
1251
1252 } while (!success);
1253#endif
1254 unset_error_buffer(c_handle);
1255}
1256#endif
1257
1258string get_cookie_file_base() {
1259 bool found = false;
1260 string cookie_filename;
1261 TheBESKeys::TheKeys()->get_value(HTTP_COOKIES_FILE_KEY, cookie_filename, found);
1262 if (!found) {
1263 cookie_filename = HTTP_DEFAULT_COOKIES_FILE;
1264 }
1265 return cookie_filename;
1266}
1267
1268string get_cookie_filename() {
1269 string cookie_file_base = get_cookie_file_base();
1270 stringstream cf_with_pid;
1271 cf_with_pid << cookie_file_base << "-" << getpid();
1272 return cf_with_pid.str();
1273}
1274
1275void clear_cookies() {
1276 string cf = get_cookie_filename();
1277 int ret = unlink(cf.c_str());
1278 if (ret) {
1279 string msg = prolog + "Failed to unlink the cookie file: " + cf;
1280 ERROR_LOG(msg << endl);
1281 BESDEBUG(MODULE, prolog << msg << endl);
1282 }
1283}
1284
1285
1295bool is_retryable(std::string target_url) {
1296 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
1297 bool retryable = true;
1298
1299 vector<string> nr_regexs;
1300 bool found;
1301 TheBESKeys::TheKeys()->get_values(HTTP_NO_RETRY_URL_REGEX_KEY, nr_regexs, found);
1302 if (found) {
1303 vector<string>::iterator it;
1304 for (it = nr_regexs.begin(); it != nr_regexs.end() && retryable; it++) {
1305 BESRegex no_retry_regex((*it).c_str(), (*it).size());
1306 size_t match_length;
1307 match_length = no_retry_regex.match(target_url.c_str(), target_url.size(), 0);
1308 if (match_length == target_url.size()) {
1309 BESDEBUG(MODULE, prolog << "The url: '" << target_url << "' fully matched the "
1310 << HTTP_NO_RETRY_URL_REGEX_KEY << ": '" << *it << "'" << endl);
1311 retryable = false;
1312 }
1313 }
1314 }
1315 BESDEBUG(MODULE, prolog << "END retryable: " << (retryable ? "true" : "false") << endl);
1316 return retryable;
1317}
1318
1353bool eval_http_get_response(CURL *ceh, char *error_buffer, const string &requested_url) {
1354 BESDEBUG(MODULE, prolog << "Requested URL: " << requested_url << endl);
1355 CURLcode curl_code;
1356 string last_accessed_url = get_effective_url(ceh, requested_url);
1357 BESDEBUG(MODULE, prolog << "Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << last_accessed_url << endl);
1358
1359 long http_code = 0;
1360
1361 curl_code = curl_easy_getinfo(ceh, CURLINFO_RESPONSE_CODE, &http_code);
1362 if (curl_code == CURLE_GOT_NOTHING) {
1363 // First we check to see if the response was empty. This is a cURL error, not an HTTP error
1364 // so we have to handle it like this. And we do that because this is one of the failure modes
1365 // we see in the AWS cloud and by trapping this and returning false we are able to be resilient and retry.
1366 stringstream msg;
1367 msg << prolog << "ERROR - cURL returned CURLE_GOT_NOTHING. Message: '";
1368 msg << error_message(curl_code, error_buffer) << "' ";
1369 msg << "CURLINFO_EFFECTIVE_URL: " << last_accessed_url << " ";
1370 msg << "A retry may be possible for: " << requested_url << ")." << endl;
1371 BESDEBUG(MODULE, msg.str());
1372 ERROR_LOG(msg.str());
1373 return false;
1374 }
1375 else if (curl_code != CURLE_OK) {
1376 // Not an error we are trapping so it's fail time.
1377 throw BESInternalError(
1378 string("Error acquiring HTTP response code: ").append(curl::error_message(curl_code, error_buffer)),
1379 __FILE__, __LINE__);
1380 }
1381
1382 if (BESDebug::IsSet(MODULE)) {
1383 long redirects;
1384 curl_easy_getinfo(ceh, CURLINFO_REDIRECT_COUNT, &redirects);
1385 BESDEBUG(MODULE, prolog << "CURLINFO_REDIRECT_COUNT: " << redirects << endl);
1386
1387 char *redirect_url = NULL;
1388 curl_easy_getinfo(ceh, CURLINFO_REDIRECT_URL, &redirect_url);
1389 if (redirect_url)
1390 BESDEBUG(MODULE, prolog << "CURLINFO_REDIRECT_URL: " << redirect_url << endl);
1391 }
1392
1393 stringstream msg;
1394 if (http_code >= 400) {
1395 msg << "ERROR - The HTTP GET request for the source URL: " << requested_url << " FAILED. ";
1396 msg << "CURLINFO_EFFECTIVE_URL: " << last_accessed_url << " ";
1397 BESDEBUG(MODULE, prolog << msg.str() << endl);
1398 }
1399 msg << "The response had an HTTP status of " << http_code;
1400 msg << " which means '" << http_status_to_string(http_code) << "'";
1401
1402 // Newer Apache servers return 206 for range requests. jhrg 8/8/18
1403 switch (http_code) {
1404 case 0:
1405 {
1406 if(requested_url.find(FILE_PROTOCOL)!=0){
1407 ERROR_LOG(msg.str() << endl);
1408 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1409 }
1410 return true;
1411 }
1412 case 200: // OK
1413 case 206: // Partial content - this is to be expected since we use range gets
1414 // cases 201-205 are things we should probably reject, unless we add more
1415 // comprehensive HTTP/S processing here. jhrg 8/8/18
1416 return true;
1417
1418 //case 301: // Moved Permanently - but that's ok for now?
1419 // return true;
1420
1421 case 400: // Bad Request
1422 ERROR_LOG(msg.str() << endl);
1423 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
1424
1425 case 401: // Unauthorized
1426 case 402: // Payment Required
1427 case 403: // Forbidden
1428 ERROR_LOG(msg.str() << endl);
1429 throw BESForbiddenError(msg.str(), __FILE__, __LINE__);
1430
1431 case 404: // Not Found
1432 ERROR_LOG(msg.str() << endl);
1433 throw BESNotFoundError(msg.str(), __FILE__, __LINE__);
1434
1435 case 408: // Request Timeout
1436 ERROR_LOG(msg.str() << endl);
1437 throw BESTimeoutError(msg.str(), __FILE__, __LINE__);
1438
1439 case 422: // Unprocessable Entity
1440 case 500: // Internal server error
1441 case 502: // Bad Gateway
1442 case 503: // Service Unavailable
1443 case 504: // Gateway Timeout
1444 {
1445 if (!is_retryable(last_accessed_url)) {
1446 msg << " The semantics of this particular last accessed URL indicate that it should not be retried.";
1447 ERROR_LOG(msg.str() << endl);
1448 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1449 }
1450 return false;
1451 }
1452
1453 default: {
1454 ERROR_LOG(msg.str() << endl);
1455 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1456 }
1457 }
1458}
1459
1460
1481bool eval_curl_easy_perform_code(
1482 CURL *ceh,
1483 const string requested_url,
1484 CURLcode curl_code,
1485 char *error_buffer,
1486 const unsigned int attempt
1487) {
1488 bool success = true;
1489 string last_accessed_url = get_effective_url(ceh, requested_url);
1490 if (curl_code == CURLE_SSL_CONNECT_ERROR) {
1491 stringstream msg;
1492 msg << prolog << "ERROR - cURL experienced a CURLE_SSL_CONNECT_ERROR error. Message: '";
1493 msg << error_message(curl_code, error_buffer) << "' ";
1494 msg << "CURLINFO_EFFECTIVE_URL: " << last_accessed_url << " ";
1495 msg << "A retry may be possible for: " << requested_url << " (attempt: " << attempt << ")." << endl;
1496 BESDEBUG(MODULE, msg.str());
1497 ERROR_LOG(msg.str());
1498 success = false;
1499 }
1500 else if (curl_code == CURLE_SSL_CACERT_BADFILE) {
1501 stringstream msg;
1502 msg << prolog << "ERROR - cURL experienced a CURLE_SSL_CACERT_BADFILE error. Message: '";
1503 msg << error_message(curl_code, error_buffer) << "' ";
1504 msg << "CURLINFO_EFFECTIVE_URL: " << last_accessed_url << " ";
1505 msg << "A retry may be possible for: " << requested_url << " (attempt: " << attempt << ")." << endl;
1506 BESDEBUG(MODULE, msg.str());
1507 ERROR_LOG(msg.str());
1508 success = false;
1509 }
1510 else if (curl_code == CURLE_GOT_NOTHING) {
1511 // First we check to see if the response was empty. This is a cURL error, not an HTTP error
1512 // so we have to handle it like this. And we do that because this is one of the failure modes
1513 // we see in the AWS cloud and by trapping this and returning false we are able to be resilient and retry.
1514 stringstream msg;
1515 msg << prolog << "ERROR - cURL returned CURLE_GOT_NOTHING. Message: ";
1516 msg << error_message(curl_code, error_buffer) << "' ";
1517 msg << "CURLINFO_EFFECTIVE_URL: " << last_accessed_url << " ";
1518 msg << "A retry may be possible for: " << requested_url << " (attempt: " << attempt << ")." << endl;
1519 BESDEBUG(MODULE, msg.str());
1520 ERROR_LOG(msg.str());
1521 return false;
1522 }
1523 else if (CURLE_OK != curl_code) {
1524 stringstream msg;
1525 msg << "ERROR - Problem with data transfer. Message: " << error_message(curl_code, error_buffer);
1526 string effective_url = get_effective_url(ceh, requested_url);
1527 msg << " CURLINFO_EFFECTIVE_URL: " << effective_url;
1528 BESDEBUG(MODULE, prolog << msg.str() << endl);
1529 ERROR_LOG(msg.str() << endl);
1530 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1531 }
1532 return success;
1533}
1534
1535#if 0
1543 void retrieve_effective_url(const string &target_url, string &last_accessed_url) {
1544 vector<string> resp_hdrs;
1545 CURL *ceh = NULL;
1546 // CURLcode curl_code;
1547 curl_slist *request_headers = NULL;
1548
1549 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
1550
1551 // Add the authorization headers
1552 request_headers = add_auth_headers(request_headers);
1553
1554 try {
1555 BESDEBUG(MODULE,
1556 prolog << "BESDebug::IsSet(" << MODULE << "): " << (BESDebug::IsSet(MODULE) ? "true" : "false")
1557 << endl);
1558 BESDEBUG(MODULE, prolog << "BESDebug::IsSet(" << TIMING_LOG_KEY << "): "
1559 << (BESDebug::IsSet(TIMING_LOG_KEY) ? "true" : "false") << endl);
1560 BESDEBUG(MODULE,
1561 prolog << "BESLog::TheLog()->is_verbose(): " << (BESLog::TheLog()->is_verbose() ? "true" : "false")
1562 << endl);
1563
1564 ceh = init_effective_url_retriever_handle(target_url, request_headers, resp_hdrs);
1565
1566 {
1567 BESStopWatch sw;
1568 if (BESDebug::IsSet("euc") || BESDebug::IsSet(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) ||
1569 BESLog::TheLog()->is_verbose()) {
1570 sw.start(prolog + " Following Redirects Starting With: " + target_url);
1571 }
1572 super_easy_perform(ceh);
1573 }
1574
1575 // After doing the thing with super_easy_perform() we retrieve the effective URL form the cURL handle.
1576 last_accessed_url = get_effective_url(ceh, target_url);
1577 BESDEBUG(MODULE, prolog << "Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << last_accessed_url << endl);
1578 INFO_LOG(
1579 prolog << "Source URL: '" << target_url << "' CURLINFO_EFFECTIVE_URL: '" << last_accessed_url << "'"
1580 << endl);
1581
1582 if (request_headers)
1583 curl_slist_free_all(request_headers);
1584 if (ceh)
1585 curl_easy_cleanup(ceh);
1586 }
1587 catch (...) {
1588 if (request_headers)
1589 curl_slist_free_all(request_headers);
1590 if (ceh)
1591 curl_easy_cleanup(ceh);
1592 throw;
1593 }
1594 }
1595#endif
1604 std::shared_ptr<http::EffectiveUrl> retrieve_effective_url(const std::shared_ptr<http::url> &starting_point_url) {
1605
1606 vector<string> resp_hdrs;
1607 CURL *ceh = nullptr;
1608 // CURLcode curl_code;
1609 curl_slist *request_headers = nullptr;
1610
1611 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
1612
1613 // Add the authorization headers
1614 request_headers = add_edl_auth_headers(request_headers);
1615
1616 try {
1617 BESDEBUG(MODULE,
1618 prolog << "BESDebug::IsSet(" << MODULE << "): " << (BESDebug::IsSet(MODULE) ? "true" : "false")
1619 << endl);
1620 BESDEBUG(MODULE, prolog << "BESDebug::IsSet(" << TIMING_LOG_KEY << "): "
1621 << (BESDebug::IsSet(TIMING_LOG_KEY) ? "true" : "false") << endl);
1622 BESDEBUG(MODULE, prolog << "BESLog::TheLog()->is_verbose(): "
1623 << (BESLog::TheLog()->is_verbose() ? "true" : "false") << endl);
1624
1625 ceh = init_effective_url_retriever_handle(starting_point_url->str(), request_headers, resp_hdrs);
1626
1627 {
1628 BESStopWatch sw;
1629 if (BESDebug::IsSet("euc") || BESDebug::IsSet(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) ||
1630 BESLog::TheLog()->is_verbose()) {
1631 sw.start(prolog + " Following Redirects Starting With: " + starting_point_url->str());
1632 }
1633 super_easy_perform(ceh);
1634 }
1635
1636 // After doing the thing with super_easy_perform() we retrieve the effective URL form the cURL handle.
1637 string e_url_str = get_effective_url(ceh, starting_point_url->str());
1638 std::shared_ptr<http::EffectiveUrl> eurl(new EffectiveUrl(e_url_str, resp_hdrs, starting_point_url->is_trusted()));
1639
1640 BESDEBUG(MODULE, prolog << "Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << eurl->str() <<
1641 "(" << (eurl->is_trusted()?"":"NOT ") << "trusted)" << endl);
1642
1643 INFO_LOG(prolog << "Source URL: '" << starting_point_url->str() << "(" << (starting_point_url->is_trusted() ? "" : "NOT ") << "trusted)" <<
1644 "' CURLINFO_EFFECTIVE_URL: '" << eurl->str() << "'" << "(" << (eurl->is_trusted()?"":"NOT ") << "trusted)" << endl);
1645
1646
1647 if (request_headers)
1648 curl_slist_free_all(request_headers);
1649 if (ceh)
1650 curl_easy_cleanup(ceh);
1651
1652 return eurl;
1653 }
1654 catch (...) {
1655 if (request_headers)
1656 curl_slist_free_all(request_headers);
1657 if (ceh)
1658 curl_easy_cleanup(ceh);
1659 throw;
1660 }
1661
1662#if 0
1663 {
1664 unsigned int attempts = 0;
1665 bool success = true;
1666 useconds_t retry_time = uone_second / 4;
1667
1668 char error_buffer[CURL_ERROR_SIZE];
1669 vector<string> resp_hdrs;
1670 CURL *ceh = NULL;
1671 CURLcode curl_code;
1672
1673 struct curl_slist *request_headers = NULL;
1674 // Add the authorization headers
1675 request_headers = get_auth_headers(request_headers);
1676
1677 try {
1678 ceh = init_effective_url_retriever_handle(url, request_headers, resp_hdrs);
1679 set_error_buffer(ceh, error_buffer);
1680 do {
1681 // bool do_retry;
1682 error_buffer[0] = 0; // Initialize to empty string
1683 ++attempts;
1684 BESDEBUG(MODULE, prolog << "Requesting URL: " << starting_point_url << " attempt: " << attempts << endl);
1685
1686 curl_code = curl_easy_perform(ceh);
1687 success = eval_curl_easy_perform_code(ceh, starting_point_url, curl_code, error_buffer, attempts);
1688 if (success) {
1689 // Nothing obvious went wrong with the curl_easy_perfom() so now we check the HTTP stuff
1690 success = eval_http_get_response(ceh, starting_point_url);
1691 if (!success) {
1692 if (attempts == retry_limit) {
1693 string msg = prolog +
1694 "ERROR - Problem with data transfer. Number of re-tries exceeded. Giving up.";
1695 LOG(msg << endl);
1696 throw BESInternalError(msg, __FILE__, __LINE__);
1697 } else {
1698 LOG(prolog << "ERROR - Problem with data transfer. Will retry (url: " << starting_point_url <<
1699 " attempt: " << attempts << ")." << endl);
1700 }
1701 }
1702 }
1703 // If it did not work we keep trying until we have exceeded the retry_limit.
1704 if (!success) {
1705 usleep(retry_time);
1706 retry_time *= 2;
1707 }
1708 } while (!success);
1709
1710 char *effective_url = 0;
1711 curl_easy_getinfo(ceh, CURLINFO_EFFECTIVE_URL, &effective_url);
1712 BESDEBUG(MODULE, prolog << " CURLINFO_EFFECTIVE_URL: " << effective_url << endl);
1713 last_accessed_url = effective_url;
1714
1715 LOG(prolog << "Source URL: '" << starting_point_url << "' Last Accessed URL: '" << last_accessed_url << "'" << endl);
1716
1717 unset_error_buffer(ceh);
1718
1719 if (ceh) {
1720 curl_slist_free_all(request_headers);
1721 curl_easy_cleanup(ceh);
1722 ceh = 0;
1723 }
1724 }
1725 catch (...) {
1726 if (request_headers)
1727 curl_slist_free_all(request_headers);
1728 if (ceh) {
1729 curl_easy_cleanup(ceh);
1730 ceh = 0;
1731 }
1732 throw;
1733 }
1734 }
1735#endif
1736 }
1737
1747string get_netrc_filename() {
1748 string netrc_filename;
1749 bool found = false;
1750 TheBESKeys::TheKeys()->get_value(HTTP_NETRC_FILE_KEY, netrc_filename, found);
1751 if (found) {
1752 BESDEBUG(MODULE, prolog << "Using netrc file: " << netrc_filename << endl);
1753 }
1754 else {
1755 BESDEBUG(MODULE, prolog << "Using default netrc file. (~/.netrc)" << endl);
1756 }
1757 return netrc_filename;
1758}
1759
1765void set_error_buffer(CURL *ceh, char *error_buffer) {
1766 CURLcode res;
1767 res = curl_easy_setopt(ceh, CURLOPT_ERRORBUFFER, error_buffer);
1768 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_ERRORBUFFER", error_buffer, __FILE__, __LINE__);
1769}
1770
1776void unset_error_buffer(CURL *ceh) {
1777 set_error_buffer(ceh, NULL);
1778}
1779
1780
1785string hyrax_user_agent() {
1786 string user_agent;
1787 bool found;
1788 TheBESKeys::TheKeys()->get_value(HTTP_USER_AGENT_KEY,user_agent, found);
1789 if(!found || user_agent.empty()){
1790 user_agent = HTTP_DEFAULT_USER_AGENT;
1791 }
1792 BESDEBUG(MODULE, prolog << "User-Agent: "<< user_agent << endl);
1793 return user_agent;
1794}
1795
1811void eval_curl_easy_setopt_result(
1812 CURLcode curl_code,
1813 string msg_base,
1814 string opt_name,
1815 char *ebuf,
1816 string file,
1817 unsigned int line) {
1818 if (curl_code != CURLE_OK) {
1819 stringstream msg;
1820 msg << msg_base << "ERROR - cURL failed to set " << opt_name << " Message: " << curl::error_message(curl_code, ebuf);
1821 throw BESInternalError(msg.str(), file, line);
1822 }
1823}
1824
1825unsigned long max_redirects() {
1827}
1828
1840curl_slist *append_http_header(curl_slist *slist, const string &header_name, const string &value)
1841{
1842
1843 string full_header = header_name;
1844 full_header.append(": ").append(value);
1845
1846 BESDEBUG(MODULE, prolog << full_header << endl);
1847 // std::cerr << prolog << full_header << endl;
1848
1849 struct curl_slist *temp = curl_slist_append(slist, full_header.c_str());
1850 if (!temp){
1851 stringstream msg;
1852 msg << prolog << "Encountered cURL Error setting the " << header_name << " header. full_header: " << full_header;
1853 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1854 }
1855 return temp;
1856}
1857
1858
1889curl_slist *add_edl_auth_headers(curl_slist *request_headers) {
1890 bool found;
1891 string s;
1892
1893 s = BESContextManager::TheManager()->get_context(EDL_UID_KEY, found);
1894 if (found && !s.empty()) {
1895 request_headers = append_http_header(request_headers,"User-Id",s);
1896 }
1897
1898 s = BESContextManager::TheManager()->get_context(EDL_AUTH_TOKEN_KEY, found);
1899 if (found && !s.empty()) {
1900 request_headers = append_http_header(request_headers,"Authorization",s);
1901 }
1902
1903 s = BESContextManager::TheManager()->get_context(EDL_ECHO_TOKEN_KEY, found);
1904 if (found && !s.empty()) {
1905 request_headers = append_http_header(request_headers,"Echo-Token",s);
1906 }
1907
1908 return request_headers;
1909}
1910
1918string get_effective_url(CURL *ceh, string requested_url) {
1919 char *effectve_url = nullptr;
1920 CURLcode curl_code = curl_easy_getinfo(ceh, CURLINFO_EFFECTIVE_URL, &effectve_url);
1921 if (curl_code != CURLE_OK) {
1922 stringstream msg;
1923 msg << prolog << "Unable to determine CURLINFO_EFFECTIVE_URL! Requested URL: " << requested_url;
1924 BESDEBUG(MODULE, msg.str() << endl);
1925 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1926 }
1927 return effectve_url;
1928}
1929
1930
1931} /* namespace curl */
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
error thrown if there is a user syntax error in the request or any other user error
error thrown if there is a user syntax error in the request or any other user error
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
static AllowedHosts * theHosts()
Static accessor for the singleton.
Definition: AllowedHosts.cc:69
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
size_t load_max_redirects_from_keys()
Definition: HttpUtils.cc:178