bes Updated for version 3.20.13
Chunk.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2016 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <sstream>
27#include <cstring>
28#include <cassert>
29
30#include <zlib.h>
31
32#include <BESDebug.h>
33#include <BESLog.h>
34#include <BESInternalError.h>
35#include <BESSyntaxUserError.h>
36#include <BESForbiddenError.h>
37#include <BESContextManager.h>
38#include <BESUtil.h>
39
40#define PUGIXML_NO_XPATH
41#define PUGIXML_HEADER_ONLY
42#include <pugixml.hpp>
43
44#include "Chunk.h"
45#include "CurlUtils.h"
46#include "CurlHandlePool.h"
47#include "EffectiveUrlCache.h"
48#include "DmrppRequestHandler.h"
49#include "DmrppNames.h"
50
51using namespace std;
53
54#define prolog std::string("Chunk::").append(__func__).append("() - ")
55
56#define FLETCHER32_CHECKSUM 4 // Bytes in the fletcher32 checksum
57#define ACTUALLY_USE_FLETCHER32_CHECKSUM 1 // Computing checksums takes time...
58
59namespace dmrpp {
60
73size_t chunk_header_callback(char *buffer, size_t /*size*/, size_t nitems, void *data) {
74 // received header is nitems * size long in 'buffer' NOT ZERO TERMINATED
75 // 'userdata' is set with CURLOPT_HEADERDATA
76 // 'size' is always 1
77
78 // -2 strips of the CRLF at the end of the header
79 string header(buffer, buffer + nitems - 2);
80
81 // Look for the content type header and store its value in the Chunk
82 if (header.find("Content-Type") != string::npos) {
83 // Header format 'Content-Type: <value>'
84 auto c_ptr = reinterpret_cast<Chunk *>(data);
85 c_ptr->set_response_content_type(header.substr(header.find_last_of(' ') + 1));
86 }
87
88 return nitems;
89}
90
96void process_s3_error_response(const shared_ptr<http::url> &data_url, const string &xml_message)
97{
98 // See https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
99 // for the low-down on this XML document.
100 pugi::xml_document error;
101 pugi::xml_parse_result result = error.load_string(xml_message.c_str());
102 if (!result)
103 throw BESInternalError("The underlying data store returned an unintelligible error message.", __FILE__, __LINE__);
104
105 pugi::xml_node err_elmnt = error.document_element();
106 if (!err_elmnt || (strcmp(err_elmnt.name(), "Error") != 0))
107 throw BESInternalError("The underlying data store returned a bogus error message.", __FILE__, __LINE__);
108
109 string code = err_elmnt.child_value("Code");
110 string message = err_elmnt.child_value("Message");
111
112 // We might want to get the "Code" from the "Error" if these text messages
113 // are not good enough. But the "Code" is not really suitable for normal humans...
114 // jhrg 12/31/19
115
116 if (code == "AccessDenied") {
117 stringstream msg;
118 msg << prolog << "ACCESS DENIED - The underlying object store has refused access to: ";
119 msg << data_url->str() << " Object Store Message: " << message;
120 BESDEBUG(MODULE, msg.str() << endl);
121 VERBOSE(msg.str() << endl);
122 throw BESForbiddenError(msg.str(), __FILE__, __LINE__);
123 }
124 else {
125 stringstream msg;
126 msg << prolog << "ERROR - The underlying object store returned an error. ";
127 msg << "(Tried: " << data_url->str() << ") Object Store Message: " << message;
128 BESDEBUG(MODULE, msg.str() << endl);
129 VERBOSE(msg.str() << endl);
130 throw BESInternalError(msg.str(), __FILE__, __LINE__);
131 }
132}
133
147size_t chunk_write_data(void *buffer, size_t size, size_t nmemb, void *data) {
148 BESDEBUG(MODULE, prolog << "BEGIN " << endl);
149 size_t nbytes = size * nmemb;
150 auto chunk = reinterpret_cast<Chunk *>(data);
151
152
153 auto data_url = chunk->get_data_url();
154 BESDEBUG(MODULE, prolog << "chunk->get_data_url():" << data_url << endl);
155
156 // When Content-Type is 'application/xml,' that's an error. jhrg 6/9/20
157 BESDEBUG(MODULE, prolog << "chunk->get_response_content_type():" << chunk->get_response_content_type() << endl);
158 if (chunk->get_response_content_type().find("application/xml") != string::npos) {
159 // At this point we no longer care about great performance - error msg readability
160 // is more important. jhrg 12/30/19
161 string xml_message = reinterpret_cast<const char *>(buffer);
162 xml_message.erase(xml_message.find_last_not_of("\t\n\v\f\r 0") + 1);
163 // Decode the AWS XML error message. In some cases this will fail because pub keys,
164 // which maybe in this error text, may have < or > chars in them. the XML parser
165 // will be sad if that happens. jhrg 12/30/19
166 try {
167 process_s3_error_response(data_url, xml_message); // throws a BESError
168 }
169 catch (BESError) {
170 // re-throw any BESError - added for the future if we make BESError a child
171 // of std::exception as it should be. jhrg 12/30/19
172 throw;
173 }
174 catch (std::exception &e) {
175 stringstream msg;
176 msg << prolog << "Caught std::exception when accessing object store data.";
177 msg << " (Tried: " << data_url->str() << ")" << " Message: " << e.what();
178 BESDEBUG(MODULE, msg.str() << endl);
179 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
180 }
181 }
182
183 // rbuf: |******++++++++++----------------------|
184 // ^ ^ bytes_read + nbytes
185 // | bytes_read
186
187 unsigned long long bytes_read = chunk->get_bytes_read();
188
189 // If this fails, the code will write beyond the buffer.
190 if (bytes_read + nbytes > chunk->get_rbuf_size()) {
191 stringstream msg;
192 msg << prolog << "ERROR! The number of bytes_read: " << bytes_read << " plus the number of bytes to read: "
193 << nbytes << " is larger than the target buffer size: " << chunk->get_rbuf_size();
194 BESDEBUG(MODULE, msg.str() << endl);
195 DmrppRequestHandler::curl_handle_pool->release_all_handles();
196 throw BESInternalError(msg.str(), __FILE__, __LINE__);
197 }
198
199 memcpy(chunk->get_rbuf() + bytes_read, buffer, nbytes);
200 chunk->set_bytes_read(bytes_read + nbytes);
201
202 BESDEBUG(MODULE, prolog << "END" << endl);
203
204 return nbytes;
205}
206
217void inflate(char *dest, unsigned long long dest_len, char *src, unsigned long long src_len) {
218 /* Sanity check */
219 assert(src_len > 0);
220 assert(src);
221 assert(dest_len > 0);
222 assert(dest);
223
224 /* Input; uncompress */
225 z_stream z_strm; /* zlib parameters */
226
227 /* Set the decompression parameters */
228 memset(&z_strm, 0, sizeof(z_strm));
229 z_strm.next_in = (Bytef *) src;
230 z_strm.avail_in = src_len;
231 z_strm.next_out = (Bytef *) dest;
232 z_strm.avail_out = dest_len;
233
234 /* Initialize the decompression routines */
235 if (Z_OK != inflateInit(&z_strm))
236 throw BESError("Failed to initialize inflate software.", BES_INTERNAL_ERROR, __FILE__, __LINE__);
237
238 /* Loop to uncompress the buffer */
239 int status = Z_OK;
240 do {
241 /* Uncompress some data */
242 status = inflate(&z_strm, Z_SYNC_FLUSH);
243
244 /* Check if we are done decompressing data */
245 if (Z_STREAM_END == status) break; /*done*/
246
247 /* Check for error */
248 if (Z_OK != status) {
249 stringstream err_msg;
250 err_msg << "Failed to inflate data chunk.";
251 char const *err_msg_cstr = z_strm.msg;
252 if(err_msg_cstr)
253 err_msg << " zlib message: " << err_msg_cstr;
254 (void) inflateEnd(&z_strm);
255 throw BESError(err_msg.str(), BES_INTERNAL_ERROR, __FILE__, __LINE__);
256 }
257 else {
258 /* If we're not done and just ran out of buffer space, it's an error.
259 * The HDF5 library code would extend the buffer as-needed, but for
260 * this handler, we should always know the size of the decompressed chunk.
261 */
262 if (0 == z_strm.avail_out) {
263 throw BESError("Data buffer is not big enough for uncompressed data.", BES_INTERNAL_ERROR, __FILE__, __LINE__);
264#if 0
265 /* Here's how to extend the buffer if needed. This might be useful some day... */
266 void *new_outbuf; /* Pointer to new output buffer */
267
268 /* Allocate a buffer twice as big */
269 nalloc *= 2;
270 if (NULL == (new_outbuf = H5MM_realloc(outbuf, nalloc))) {
271 (void) inflateEnd(&z_strm);
272 HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, 0, "memory allocation failed for inflate decompression")
273 } /* end if */
274 outbuf = new_outbuf;
275
276 /* Update pointers to buffer for next set of uncompressed data */
277 z_strm.next_out = (unsigned char*) outbuf + z_strm.total_out;
278 z_strm.avail_out = (uInt) (nalloc - z_strm.total_out);
279#endif
280 } /* end if */
281 } /* end else */
282 } while (true /* status == Z_OK */); // Exit via the break statement after the call to inflate(). jhrg 11/8/21
283
284 /* Finish decompressing the stream */
285 (void) inflateEnd(&z_strm);
286}
287
288// #define this to enable the duff's device loop unrolling code.
289// jhrg 1/19/17
290#define DUFFS_DEVICE
291
313void unshuffle(char *dest, const char *src, unsigned long long src_size, unsigned long long width) {
314 unsigned long long elems = src_size / width; // int division rounds down
315
316 /* Don't do anything for 1-byte elements, or "fractional" elements */
317 if (!(width > 1 && elems > 1)) {
318 memcpy(dest, const_cast<char *>(src), src_size);
319 }
320 else {
321 /* Get the pointer to the source buffer (Alias for source buffer) */
322 char *_src = const_cast<char *>(src);
323 char *_dest = 0; // Alias for destination buffer
324
325 /* Input; unshuffle */
326 for (unsigned int i = 0; i < width; i++) {
327 _dest = dest + i;
328#ifndef DUFFS_DEVICE
329 size_t j = elems;
330 while(j > 0) {
331 *_dest = *_src++;
332 _dest += width;
333
334 j--;
335 }
336#else /* DUFFS_DEVICE */
337 {
338 size_t duffs_index = (elems + 7) / 8; /* Counting index for Duff's device */
339 switch (elems % 8) {
340 default:
341 assert(0 && "This Should never be executed!");
342 break;
343 case 0:
344 do {
345 // This macro saves repeating the same line 8 times
346#define DUFF_GUTS *_dest = *_src++; _dest += width;
347
348 DUFF_GUTS
349 case 7:
350 DUFF_GUTS
351 case 6:
352 DUFF_GUTS
353 case 5:
354 DUFF_GUTS
355 case 4:
356 DUFF_GUTS
357 case 3:
358 DUFF_GUTS
359 case 2:
360 DUFF_GUTS
361 case 1:
362 DUFF_GUTS
363 } while (--duffs_index > 0);
364 } /* end switch */
365 } /* end block */
366#endif /* DUFFS_DEVICE */
367
368 } /* end for i = 0 to width*/
369
370 /* Compute the leftover bytes if there are any */
371 size_t leftover = src_size % width;
372
373 /* Add leftover to the end of data */
374 if (leftover > 0) {
375 /* Adjust back to end of shuffled bytes */
376 _dest -= (width - 1); /*lint !e794 _dest is initialized */
377 memcpy((void *) _dest, (void *) _src, leftover);
378 }
379 } /* end if width and elems both > 1 */
380}
381
387static void split_by_comma(const string &s, vector<unsigned long long> &res)
388{
389 const string delimiter = ",";
390 const size_t delim_len = delimiter.length();
391
392 size_t pos_start = 0, pos_end;
393
394 while ((pos_end = s.find (delimiter, pos_start)) != string::npos) {
395 res.push_back (stoull(s.substr(pos_start, pos_end - pos_start)));
396 pos_start = pos_end + delim_len;
397 }
398
399 res.push_back (stoull(s.substr (pos_start)));
400}
401
402void Chunk::parse_chunk_position_in_array_string(const string &pia, vector<unsigned long long> &cpia_vect)
403{
404 if (pia.empty()) return;
405
406 if (!cpia_vect.empty()) cpia_vect.clear();
407
408 // Assume input is [x,y,...,z] where x, ..., are integers; modest syntax checking
409 // [1] is a minimal 'position in array' string.
410 if (pia.find('[') == string::npos || pia.find(']') == string::npos || pia.length() < 3)
411 throw BESInternalError("while parsing a DMR++, chunk position string malformed", __FILE__, __LINE__);
412
413 if (pia.find_first_not_of("[]1234567890,") != string::npos)
414 throw BESInternalError("while parsing a DMR++, chunk position string illegal character(s)", __FILE__, __LINE__);
415
416 try {
417 split_by_comma(pia.substr(1, pia.length() - 2), cpia_vect);
418 }
419 catch(const std::invalid_argument &e) {
420 throw BESInternalError(string("while parsing a DMR++, chunk position string illegal character(s): ").append(e.what()), __FILE__, __LINE__);
421 }
422}
423
424
438void Chunk::set_position_in_array(const string &pia) {
439 parse_chunk_position_in_array_string(pia,d_chunk_position_in_array);
440}
441
450void Chunk::set_position_in_array(const std::vector<unsigned long long> &pia) {
451 if (pia.empty()) return;
452
453 if (!d_chunk_position_in_array.empty()) d_chunk_position_in_array.clear();
454
455 d_chunk_position_in_array = pia;
456}
457
466 return curl::get_range_arg_string(d_offset, d_size);
467}
468
487
488 // If there is no data url then there is nothing to add the parameter too.
489 if(d_data_url == nullptr)
490 return;
491
492 bool found = false;
493 string cloudydap_context_value = BESContextManager::TheManager()->get_context(S3_TRACKING_CONTEXT, found);
494 if (!found)
495 return;
496
511 bool add_tracking = false;
512
513 // All S3 buckets, virtual host style URL
514 // Simpler regex that's likely equivalent:
515 // ^https?:\/\/[a-z0-9]([-.a-z0-9]){1,61}[a-z0-9]\.s3[-.]us-(east|west)-[12])?\.amazonaws\.com\/.*$
516 string s3_vh_regex_str = R"(^https?:\/\/([a-z]|[0-9])(([a-z]|[0-9]|\.|-){1,61})([a-z]|[0-9])\.s3((\.|-)us-(east|west)-(1|2))?\.amazonaws\.com\/.*$)";
517
518 BESRegex s3_vh_regex(s3_vh_regex_str.c_str());
519 int match_result = s3_vh_regex.match(d_data_url->str().c_str(), d_data_url->str().length());
520 if(match_result>=0) {
521 auto match_length = (unsigned int) match_result;
522 if (match_length == d_data_url->str().length()) {
523 BESDEBUG(MODULE,
524 prolog << "FULL MATCH. pattern: " << s3_vh_regex_str << " url: " << d_data_url->str() << endl);
525 add_tracking = true;;
526 }
527 }
528
529 if(!add_tracking){
530 // All S3 buckets, path style URL
531 string s3_path_regex_str = R"(^https?:\/\/s3((\.|-)us-(east|west)-(1|2))?\.amazonaws\.com\/([a-z]|[0-9])(([a-z]|[0-9]|\.|-){1,61})([a-z]|[0-9])\/.*$)";
532 BESRegex s3_path_regex(s3_path_regex_str.c_str());
533 match_result = s3_path_regex.match(d_data_url->str().c_str(), d_data_url->str().length());
534 if(match_result>=0) {
535 auto match_length = (unsigned int) match_result;
536 if (match_length == d_data_url->str().length()) {
537 BESDEBUG(MODULE,
538 prolog << "FULL MATCH. pattern: " << s3_vh_regex_str << " url: " << d_data_url->str() << endl);
539 add_tracking = true;;
540 }
541 }
542 }
543
544 if (add_tracking) {
545 // Yup, headed to S3.
546 d_query_marker.append(S3_TRACKING_CONTEXT).append("=").append(cloudydap_context_value);
547 }
548}
549
556uint32_t
557checksum_fletcher32(const void *_data, size_t _len)
558{
559 const auto *data = (const uint8_t *)_data; // Pointer to the data to be summed
560 size_t len = _len / 2; // Length in 16-bit words
561 uint32_t sum1 = 0, sum2 = 0;
562
563 // Sanity check
564 assert(_data);
565 assert(_len > 0);
566
567 // Compute checksum for pairs of bytes
568 // (the magic "360" value is the largest number of sums that can be performed without numeric overflow)
569 while (len) {
570 size_t tlen = len > 360 ? 360 : len;
571 len -= tlen;
572 do {
573 sum1 += (uint32_t)(((uint16_t)data[0]) << 8) | ((uint16_t)data[1]);
574 data += 2;
575 sum2 += sum1;
576 } while (--tlen);
577 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
578 sum2 = (sum2 & 0xffff) + (sum2 >> 16);
579 }
580
581 /* Check for odd # of bytes */
582 if(_len % 2) {
583 sum1 += (uint32_t)(((uint16_t)*data) << 8);
584 sum2 += sum1;
585 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
586 sum2 = (sum2 & 0xffff) + (sum2 >> 16);
587 } /* end if */
588
589 /* Second reduction step to reduce sums to 16 bits */
590 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
591 sum2 = (sum2 & 0xffff) + (sum2 >> 16);
592
593 return ((sum2 << 16) | sum1);
594} /* end H5_checksum_fletcher32() */
595
606void Chunk::filter_chunk(const string &filters, unsigned long long chunk_size, unsigned long long elem_width) {
607
608 if (d_is_inflated)
609 return;
610
611 chunk_size *= elem_width;
612
613 vector<string> filter_array = BESUtil::split(filters, ' ' );
614
615 for (auto i = filter_array.rbegin(), e = filter_array.rend(); i != e; ++i) {
616 string filter = *i;
617
618 if (filter == "deflate") {
619 char *dest = new char[chunk_size];
620 try {
621 inflate(dest, chunk_size, get_rbuf(), get_rbuf_size());
622 // This replaces (and deletes) the original read_buffer with dest.
623#if DMRPP_USE_SUPER_CHUNKS
624 set_read_buffer(dest, chunk_size, chunk_size, true);
625#else
626 set_rbuf(dest, chunk_size);
627#endif
628 }
629 catch (...) {
630 delete[] dest;
631 throw;
632 }
633 }// end filter is deflate
634 else if (filter == "shuffle"){
635 // The internal buffer is chunk's full size at this point.
636 char *dest = new char[get_rbuf_size()];
637 try {
638 unshuffle(dest, get_rbuf(), get_rbuf_size(), elem_width);
639#if DMRPP_USE_SUPER_CHUNKS
641#else
642 set_rbuf(dest, get_rbuf_size());
643#endif
644 }
645 catch (...) {
646 delete[] dest;
647 throw;
648 }
649 } //end filter is shuffle
650 else if (filter == "fletcher32"){
651 // Compute the fletcher32 checksum and compare to the value of the last four bytes of the chunk.
652#if ACTUALLY_USE_FLETCHER32_CHECKSUM
653 // Get the last four bytes of chunk's data (which is a byte array) and treat that as the four-byte
654 // integer fletcher32 checksum. jhrg 10/15/21
655 assert(get_rbuf_size() > FLETCHER32_CHECKSUM);
656 //assert((get_rbuf_size() - FLETCHER32_CHECKSUM) % 4 == 0); //probably wrong
657 auto f_checksum = *(uint32_t *)(get_rbuf() + get_rbuf_size() - FLETCHER32_CHECKSUM);
658
659 // If the code should actually use the checksum (they can be expensive to compute), does it match
660 // with once computed on the data actually read? Maybe make this a bes.conf parameter?
661 // jhrg 10/15/21
662 uint32_t calc_checksum = checksum_fletcher32((const void *)get_rbuf(), get_rbuf_size() - FLETCHER32_CHECKSUM);
663 if (f_checksum != calc_checksum) {
664 throw BESInternalError("Data read from the DMR++ handler did not match the Fletcher32 checksum.",
665 __FILE__, __LINE__);
666 }
667#endif
668 if (d_read_buffer_size > FLETCHER32_CHECKSUM)
669 d_read_buffer_size -= FLETCHER32_CHECKSUM;
670 else {
671 throw BESInternalError("Data filtered with fletcher32 don't include the four-byte checksum.",
672 __FILE__, __LINE__);
673 }
674 } // end filter is fletcher32
675 } // end for loop
676 d_is_inflated = true;
677}
678
679static unsigned int get_value_size(libdap::Type type)
680{
681 switch(type) {
682 case libdap::dods_int8_c:
683 return sizeof(int8_t);
684
685 case libdap::dods_int16_c:
686 return sizeof(int16_t);
687
688 case libdap::dods_int32_c:
689 return sizeof(int32_t);
690
691 case libdap::dods_int64_c:
692 return sizeof(int64_t);
693
694 case libdap::dods_uint8_c:
695 case libdap::dods_byte_c:
696 return sizeof(uint8_t);
697
698 case libdap::dods_uint16_c:
699 return sizeof(uint16_t);
700
701 case libdap::dods_uint32_c:
702 return sizeof(uint32_t);
703
704 case libdap::dods_uint64_c:
705 return sizeof(uint64_t);
706
707 case libdap::dods_float32_c:
708 return sizeof(float);
709
710 case libdap::dods_float64_c:
711 return sizeof(double);
712
713 default:
714 throw BESInternalError("Unknown fill value type.", __FILE__, __LINE__);
715 }
716}
717
718const char *get_value_ptr(fill_value &fv, libdap::Type type, const string &v)
719{
720 switch(type) {
721 case libdap::dods_int8_c:
722 fv.int8 = (int8_t)stoi(v);
723 return (const char *)&fv.int8;
724
725 case libdap::dods_int16_c:
726 fv.int16 = (int16_t)stoi(v);
727 return (const char *)&fv.int16;
728
729 case libdap::dods_int32_c:
730 fv.int32 = (int32_t)stoi(v);
731 return (const char *)&fv.int32;
732
733 case libdap::dods_int64_c:
734 fv.int64 = (int64_t)stoll(v);
735 return (const char *)&fv.int64;
736
737 case libdap::dods_uint8_c:
738 case libdap::dods_byte_c:
739 fv.uint8 = (uint8_t)stoi(v);
740 return (const char *)&fv.uint8;
741
742 case libdap::dods_uint16_c:
743 fv.uint16 = (uint16_t)stoi(v);
744 return (const char *)&fv.uint16;
745
746 case libdap::dods_uint32_c:
747 fv.uint32 = (uint32_t)stoul(v);
748 return (const char *)&fv.uint32;
749
750 case libdap::dods_uint64_c:
751 fv.uint64 = (uint64_t)stoull(v);
752 return (const char *)&fv.uint64;
753
754 case libdap::dods_float32_c:
755 fv.f = stof(v);
756 return (const char *)&fv.f;
757
758 case libdap::dods_float64_c:
759 fv.d = stod(v);
760 return (const char *)&fv.d;
761
762 default:
763 throw BESInternalError("Unknown fill value type.", __FILE__, __LINE__);
764 }
765}
766
771 fill_value fv;
772 const char *value = get_value_ptr(fv, d_fill_value_type, d_fill_value);
773 unsigned int value_size = get_value_size(d_fill_value_type);
774
775 unsigned long long num_values = get_rbuf_size() / value_size;
776 char *buffer = get_rbuf();
777
778 for (int i = 0; i < num_values; ++i, buffer += value_size) {
779 memcpy(buffer, value, value_size);
780 }
781
783}
784
795 if (d_is_read)
796 return;
797
798 // By default, d_read_buffer_is_mine is true. But if this is part of a SuperChunk
799 // then the SuperChunk will have allocated memory and d_read_buffer_is_mine is false.
800 if (d_read_buffer_is_mine)
802
803 if (d_uses_fill_value) {
805 }
806 else {
807 dmrpp_easy_handle *handle = DmrppRequestHandler::curl_handle_pool->get_easy_handle(this);
808 if (!handle)
809 throw BESInternalError(prolog + "No more libcurl handles.", __FILE__, __LINE__);
810
811 try {
812 handle->read_data(); // retries until success when appropriate, else throws
813 DmrppRequestHandler::curl_handle_pool->release_handle(handle);
814 }
815 catch (...) {
816 // TODO See https://bugs.earthdata.nasa.gov/browse/HYRAX-378
817 // It may be that this is the code that catches throws from
818 // chunk_write_data and based on read_data()'s behavior, the
819 // code should probably stop _all_ transfers, reclaim all
820 // handles and send a failure message up the call stack.
821 // jhrg 4/7/21
822 DmrppRequestHandler::curl_handle_pool->release_handle(handle);
823 throw;
824 }
825 }
826
827 // If the expected byte count was not read, it's an error.
828 if (get_size() != get_bytes_read()) {
829 ostringstream oss;
830 oss << "Wrong number of bytes read for chunk; read: " << get_bytes_read() << ", expected: " << get_size();
831 throw BESInternalError(oss.str(), __FILE__, __LINE__);
832 }
833
834 d_is_read = true;
835}
836
846void Chunk::dump(ostream &oss) const {
847 oss << "Chunk";
848 oss << "[ptr='" << (void *) this << "']";
849 oss << "[data_url='" << d_data_url->str() << "']";
850 oss << "[offset=" << d_offset << "]";
851 oss << "[size=" << d_size << "]";
852 oss << "[chunk_position_in_array=(";
853 for (unsigned long i = 0; i < d_chunk_position_in_array.size(); i++) {
854 if (i) oss << ",";
855 oss << d_chunk_position_in_array[i];
856 }
857 oss << ")]";
858 oss << "[is_read=" << d_is_read << "]";
859 oss << "[is_inflated=" << d_is_inflated << "]";
860}
861
862string Chunk::to_string() const {
863 std::ostringstream oss;
864 dump(oss);
865 return oss.str();
866}
867
868std::shared_ptr<http::url> Chunk::get_data_url() const {
869
870 std::shared_ptr<http::EffectiveUrl> effective_url = EffectiveUrlCache::TheCache()->get_effective_url(d_data_url);
871 BESDEBUG(MODULE, prolog << "Using data_url: " << effective_url->str() << endl);
872
873#if ENABLE_TRACKING_QUERY_PARAMETER
874 //A conditional call to void Chunk::add_tracking_query_param()
875 // here for the NASA cost model work THG's doing. jhrg 8/7/18
876 if (!d_query_marker.empty()) {
877 string url_str = effective_url->str();
878 if(url_str.find('?') != string::npos){
879 url_str.append("&");
880 }
881 else {
882 url_str.append("?");
883 }
884 url_str += d_query_marker;
885 shared_ptr<http::url> query_marker_url( new http::url(url_str));
886 return query_marker_url;
887 }
888#endif
889
890 return effective_url;
891}
892
893} // namespace dmrpp
894
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
Base exception class for the BES with basic string message.
Definition: BESError.h:59
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
Regular expression matching.
Definition: BESRegex.h:53
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
error thrown if there is a user syntax error in the request or any other user error
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition: BESUtil.cc:1065
virtual void set_bytes_read(unsigned long long bytes_read)
Set the size of this Chunk's data block.
Definition: Chunk.h:303
virtual void dump(std::ostream &strm) const
Definition: Chunk.cc:846
virtual char * get_rbuf()
Definition: Chunk.h:335
virtual void read_chunk()
Definition: Chunk.cc:794
void add_tracking_query_param()
Modify this chunk's data URL so that it includes tracking info.
Definition: Chunk.cc:486
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition: Chunk.cc:465
virtual std::shared_ptr< http::url > get_data_url() const
Definition: Chunk.cc:868
virtual void set_rbuf_to_size()
Allocates the internal read buffer to be d_size bytes.
Definition: Chunk.h:322
virtual unsigned long long get_bytes_read() const
Definition: Chunk.h:294
void set_position_in_array(const std::string &pia)
parse the chunk position string
Definition: Chunk.cc:438
virtual unsigned long long get_rbuf_size() const
Definition: Chunk.h:364
virtual unsigned long long get_size() const
Definition: Chunk.h:267
void set_read_buffer(char *buf, unsigned long long buf_size, unsigned long long bytes_read=0, bool assume_ownership=true)
Set the target read buffer for this chunk.
Definition: Chunk.h:351
virtual void filter_chunk(const std::string &filters, unsigned long long chunk_size, unsigned long long elem_width)
filter data in the chunk
Definition: Chunk.cc:606
virtual void load_fill_values()
Load the chunk with fill values - temporary implementation.
Definition: Chunk.cc:770
Bundle a libcurl easy handle with other information.
void read_data()
This is the read_data() method for all transfers.