bes Updated for version 3.20.13
CurlHandlePool.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2018 OPeNDAP, Inc.
6// Author: James Gallagher<jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <string>
27#include <locale>
28#include <sstream>
29
30#include <cstring>
31#include <unistd.h>
32
33#include <curl/curl.h>
34
35#include "CurlUtils.h"
36#include "HttpNames.h"
37
38#include <time.h>
39
40#include <libdap/util.h> // long_to_string()
41
42#include "BESLog.h"
43#include "BESDebug.h"
44#include "BESInternalError.h"
45#include "BESForbiddenError.h"
46#include "AllowedHosts.h"
47
48#include "DmrppCommon.h"
49#include "DmrppNames.h"
50#include "awsv4.h"
51#include "CurlHandlePool.h"
52#include "Chunk.h"
53#include "CredentialsManager.h"
54#include "AccessCredentials.h"
55
56#define KEEP_ALIVE 1 // Reuse libcurl easy handles (1) or not (0).
57#define CURL_VERBOSE 0 // Logs curl info to the bes.log
58
59#define prolog std::string("CurlHandlePool::").append(__func__).append("() - ")
60
61using namespace dmrpp;
62using namespace std;
63
64string pthread_error(unsigned int err){
65 string error_msg;
66 switch(err){
67 case EINVAL:
68 error_msg = "The mutex was either created with the "
69 "protocol attribute having the value "
70 "PTHREAD_PRIO_PROTECT and the calling "
71 "thread's priority is higher than the "
72 "mutex's current priority ceiling."
73 "OR The value specified by mutex does not "
74 "refer to an initialized mutex object.";
75 break;
76
77 case EBUSY:
78 error_msg = "The mutex could not be acquired "
79 "because it was already locked.";
80 break;
81
82 case EAGAIN:
83 error_msg = "The mutex could not be acquired because "
84 "the maximum number of recursive locks "
85 "for mutex has been exceeded.";
86 break;
87
88 case EDEADLK:
89 error_msg = "The current thread already owns the mutex";
90 break;
91
92 case EPERM:
93 error_msg = "The current thread does not own the mutex.";
94 break;
95
96 default:
97 error_msg = "Unknown pthread error type.";
98 break;
99 }
100
101 return error_msg;
102}
103
109#if 0
110static
111string dump(const char *text, unsigned char *ptr, size_t size)
112{
113 size_t i;
114 size_t c;
115 unsigned int width=0x10;
116
117 ostringstream oss;
118 oss << text << ", " << std::setw(10) << (long)size << std::setbase(16) << (long)size << endl;
119
120 for(i=0; i<size; i+= width) {
121 oss << std::setw(4) << (long)i;
122 // fprintf(stream, "%4.4lx: ", (long)i);
123
124 /* show hex to the left */
125 for(c = 0; c < width; c++) {
126 if(i+c < size) {
127 oss << std::setw(2) << ptr[i+c];
128 //fprintf(stream, "%02x ", ptr[i+c]);
129 }
130 else {
131 oss << " ";
132 // fputs(" ", stream);
133 }
134 }
135
136 /* show data on the right */
137 for(c = 0; (c < width) && (i+c < size); c++) {
138 char x = (ptr[i+c] >= 0x20 && ptr[i+c] < 0x80) ? ptr[i+c] : '.';
139 // fputc(x, stream);
140 oss << std::setw(1) << x;
141 }
142
143 // fputc('\n', stream); /* newline */
144 oss << endl;
145 }
146
147 return oss.str();
148}
149#endif
150
151#if CURL_VERBOSE
157static
158int curl_trace(CURL */*handle*/, curl_infotype type, char *data, size_t /*size*/, void */*userp*/)
159{
160 string text = "";
161 switch (type) {
162 // print info
163 case CURLINFO_TEXT:
164 case CURLINFO_HEADER_OUT:
165 case CURLINFO_HEADER_IN: {
166 text = data;
167 size_t pos;
168 while ((pos = text.find('\n')) != string::npos)
169 text = text.substr(0, pos);
170 break;
171 }
172
173 // Do not build up 'text' for the data transfers
174 case CURLINFO_DATA_OUT:
175 case CURLINFO_SSL_DATA_OUT:
176 case CURLINFO_DATA_IN:
177 case CURLINFO_SSL_DATA_IN:
178 default: /* in case a new one is introduced to shock us */
179 break;
180 }
181
182 switch (type) {
183 // print info
184 case CURLINFO_TEXT:
185 LOG("libcurl == Info: " << text << endl);
186 break;
187
188 case CURLINFO_HEADER_OUT:
189 LOG("libcurl == Send header: " << text << endl);
190 break;
191 case CURLINFO_HEADER_IN:
192 LOG("libcurl == Recv header: " << text << endl);
193 break;
194
195 // Only print these if we're desperate and the above code has been hacked to match
196 case CURLINFO_DATA_OUT:
197 case CURLINFO_SSL_DATA_OUT:
198 case CURLINFO_DATA_IN:
199 case CURLINFO_SSL_DATA_IN:
200 default:
201 break;
202 }
203
204 return 0;
205}
206#endif
207
208 // FIXME - This code does not make a cURL handle that follows links and I think that's a bug!
209dmrpp_easy_handle::dmrpp_easy_handle() : d_url(nullptr), d_request_headers(nullptr) {
210
211 CURLcode res;
212
213 d_handle = curl_easy_init();
214 if (!d_handle) throw BESInternalError("Could not allocate CURL handle", __FILE__, __LINE__);
215
216 curl::set_error_buffer(d_handle, d_errbuf);
217
218 res = curl_easy_setopt(d_handle, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
219 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_SSLVERSION", d_errbuf, __FILE__, __LINE__);
220
221
222#if CURL_VERBOSE
223 res = curl_easy_setopt(d_handle, CURLOPT_DEBUGFUNCTION, curl_trace);
224 curl::check_setopt_result(res, prolog, "CURLOPT_DEBUGFUNCTION", d_errbuf, __FILE__, __LINE__);
225 // Many tests fail with this option, but it's still useful to see how connections
226 // are treated. jhrg 10/2/18
227 res = curl_easy_setopt(d_handle, CURLOPT_VERBOSE, 1L);
228 curl::check_setopt_result(res, prolog, "CURLOPT_VERBOSE", d_errbuf, __FILE__, __LINE__);
229#endif
230
231 res = curl_easy_setopt(d_handle, CURLOPT_HEADERFUNCTION, chunk_header_callback);
232 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERFUNCTION", d_errbuf, __FILE__, __LINE__);
233
234 // Pass all data to the 'write_data' function
235 res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, chunk_write_data);
236 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", d_errbuf, __FILE__, __LINE__);
237
238#ifdef CURLOPT_TCP_KEEPALIVE
239 /* enable TCP keep-alive for this transfer */
240 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPALIVE, 1L);
241 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPALIVE", d_errbuf, __FILE__, __LINE__);
242#endif
243
244#ifdef CURLOPT_TCP_KEEPIDLE
245 /* keep-alive idle time to 120 seconds */
246 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPIDLE, 120L);
247 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPIDLE", d_errbuf, __FILE__, __LINE__);
248#endif
249
250#ifdef CURLOPT_TCP_KEEPINTVL
251 /* interval time between keep-alive probes: 120 seconds */
252 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPINTVL, 120L)
253 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPINTVL", d_errbuf, __FILE__, __LINE__);
254#endif
255
256 d_in_use = false;
257 d_chunk = 0;
258}
259
260dmrpp_easy_handle::~dmrpp_easy_handle() {
261 if (d_handle) curl_easy_cleanup(d_handle);
262 if (d_request_headers) curl_slist_free_all(d_request_headers);
263}
264
278 // Treat HTTP/S requests specially; retry some kinds of failures.
279 if (d_url->protocol() == HTTPS_PROTOCOL || d_url->protocol() == HTTP_PROTOCOL) {
280 curl::super_easy_perform(d_handle);
281 }
282 else {
283 CURLcode curl_code = curl_easy_perform(d_handle);
284 if (CURLE_OK != curl_code) {
285 string msg = prolog + "ERROR - Data transfer error: ";
286 throw BESInternalError(msg.append(curl::error_message(curl_code, d_errbuf)), __FILE__, __LINE__);
287 }
288 }
289
290 d_chunk->set_is_read(true);
291}
292
293CurlHandlePool::CurlHandlePool(unsigned int max_handles) : d_max_easy_handles(max_handles) {
294 for (unsigned int i = 0; i < d_max_easy_handles; ++i) {
295 d_easy_handles.push_back(new dmrpp_easy_handle());
296 }
297}
298
315 // Here we check to make sure that we are only going to
316 // access an approved location with this easy_handle
317 // TODO I don't think this belongs here. jhrg 5/13/22
318 string reason = "The requested resource does not match any of the AllowedHost rules.";
319 if (!http::AllowedHosts::theHosts()->is_allowed(chunk->get_data_url(),reason)) {
320 stringstream ss;
321 ss << "ERROR! The chunk url "<< chunk->get_data_url()->str() << " was rejected because: " << reason;
322 throw BESForbiddenError(ss.str(), __FILE__, __LINE__);
323 }
324
325 std::lock_guard<std::recursive_mutex> lock_me(d_get_easy_handle_mutex);
326
327 dmrpp_easy_handle *handle = 0;
328 for (auto i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
329 if (!(*i)->d_in_use) {
330 handle = *i;
331 break;
332 }
333 }
334
335 if (handle) {
336 // Once here, d_easy_handle holds a CURL* we can use.
337 handle->d_in_use = true;
338 handle->d_url = chunk->get_data_url();
339
340 handle->d_chunk = chunk;
341
342 CURLcode res = curl_easy_setopt(handle->d_handle, CURLOPT_URL, chunk->get_data_url()->str().c_str());
343 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_URL", handle->d_errbuf, __FILE__, __LINE__);
344
345 // get the offset to offset + size bytes
346 res = curl_easy_setopt(handle->d_handle, CURLOPT_RANGE, chunk->get_curl_range_arg_string().c_str());
347 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_RANGE", handle->d_errbuf, __FILE__, __LINE__);
348
349 // Pass this to chunk_header_callback as the fourth argument
350 res = curl_easy_setopt(handle->d_handle, CURLOPT_HEADERDATA, reinterpret_cast<void *>(chunk));
351 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERDATA", handle->d_errbuf, __FILE__, __LINE__);
352
353 // Pass this to chunk_write_data as the fourth argument
354 res = curl_easy_setopt(handle->d_handle, CURLOPT_WRITEDATA, reinterpret_cast<void *>(chunk));
355 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEDATA", handle->d_errbuf, __FILE__, __LINE__);
356
357 // store the easy_handle so that we can call release_handle in multi_handle::read_data()
358 res = curl_easy_setopt(handle->d_handle, CURLOPT_PRIVATE, reinterpret_cast<void *>(handle));
359 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PRIVATE", handle->d_errbuf, __FILE__, __LINE__);
360
361 // Enabled cookies
362 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEFILE, curl::get_cookie_filename().c_str());
363 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEFILE", handle->d_errbuf, __FILE__, __LINE__);
364
365 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEJAR, curl::get_cookie_filename().c_str());
366 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEJAR", handle->d_errbuf, __FILE__, __LINE__);
367
368 // Follow 302 (redirect) responses
369 res = curl_easy_setopt(handle->d_handle, CURLOPT_FOLLOWLOCATION, 1);
370 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FOLLOWLOCATION", handle->d_errbuf, __FILE__, __LINE__);
371
372 res = curl_easy_setopt(handle->d_handle, CURLOPT_MAXREDIRS, curl::max_redirects());
373 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_MAXREDIRS", handle->d_errbuf, __FILE__, __LINE__);
374
375 // Set the user agent something otherwise TEA will never redirect to URS.
376 res = curl_easy_setopt(handle->d_handle, CURLOPT_USERAGENT, curl::hyrax_user_agent().c_str());
377 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_USERAGENT", handle->d_errbuf, __FILE__, __LINE__);
378
379 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
380 // choosing the the 'safest' one supported by the server.
381 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
382 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPAUTH, (long) CURLAUTH_ANY);
383 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPAUTH", handle->d_errbuf, __FILE__, __LINE__);
384
385 // Enable using the .netrc credentials file.
386 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
387 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC", handle->d_errbuf, __FILE__, __LINE__);
388
389 // If the configuration specifies a particular .netrc credentials file, use it.
390 // TODO move this operation into constructor and stash the value.
391 string netrc_file = curl::get_netrc_filename();
392 if (!netrc_file.empty()) {
393 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC_FILE, netrc_file.c_str());
394 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC_FILE", handle->d_errbuf, __FILE__, __LINE__);
395 }
396
397 AccessCredentials *credentials = CredentialsManager::theCM()->get(handle->d_url);
398 if (credentials && credentials->is_s3_cred()) {
399 BESDEBUG(DMRPP_CURL,
400 prolog << "Got AccessCredentials instance: " << endl << credentials->to_json() << endl);
401 // If there are available credentials, and they are S3 credentials then we need to sign
402 // the request
403 const std::time_t request_time = std::time(0);
404
405 const std::string auth_header =
406 AWSV4::compute_awsv4_signature(
407 handle->d_url,
408 request_time,
409 credentials->get(AccessCredentials::ID_KEY),
410 credentials->get(AccessCredentials::KEY_KEY),
411 credentials->get(AccessCredentials::REGION_KEY),
412 "s3");
413
414
415 handle->d_request_headers = curl::append_http_header((curl_slist *)0, "Authorization", auth_header);
416 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-content-sha256",
417 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
418 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-date", AWSV4::ISO8601_date(request_time));
419
420 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_request_headers);
421 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", handle->d_errbuf, __FILE__, __LINE__);
422 }
423 }
424
425 return handle;
426}
427
435 // In get_easy_handle, it's possible that d_in_use could be false and d_chunk
436 // could not be set to 0 (because a separate thread could be running these
437 // methods). In that case, the thread running get_easy_handle could set d_chunk,
438 // and then this thread could clear it (... unlikely, but an optimizing compiler is
439 // free to reorder statements so long as they don't alter the function's behavior).
440 // Timing tests indicate this lock does not cost anything that can be measured.
441 // jhrg 8/21/18
442 std::lock_guard<std::recursive_mutex> lock_me(d_get_easy_handle_mutex);
443
444 // TODO Add a call to curl reset() here. jhrg 9/23/20
445
446#if KEEP_ALIVE
447 handle->d_url = nullptr;
448 handle->d_chunk = 0;
449 handle->d_in_use = false;
450#else
451 // This is to test the effect of libcurl Keep Alive support
452 // Find the handle; erase from the vector; delete; allocate a new handle and push it back on
453 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
454 if (*i == handle) {
455 BESDEBUG("dmrpp:5", "Found a handle match for the " << i - d_easy_handles.begin() << "th easy handle." << endl);
456 delete handle;
457 *i = new dmrpp_easy_handle();
458 break;
459 }
460 }
461#endif
462}
463
470 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
471 if ((*i)->d_chunk == chunk) {
472 release_handle(*i);
473 break;
474 }
475 }
476}
477
486 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
487 release_handle(*i);
488 }
489}
virtual std::string get(const std::string &key)
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
AccessCredentials * get(std::shared_ptr< http::url > &url)
static CredentialsManager * theCM()
Returns the singleton instance of the CrednetialsManager.
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition: Chunk.cc:465
virtual std::shared_ptr< http::url > get_data_url() const
Definition: Chunk.cc:868
void release_handle(dmrpp_easy_handle *h)
dmrpp_easy_handle * get_easy_handle(Chunk *chunk)
Bundle a libcurl easy handle with other information.
void read_data()
This is the read_data() method for all transfers.
dmrpp_easy_handle()
Build a string with hex info about stuff libcurl gets.
static AllowedHosts * theHosts()
Static accessor for the singleton.
Definition: AllowedHosts.cc:69