bes Updated for version 3.20.13
BESUtil.cc
1// BESUtil.cc
2
3// This file is part of bes, A C++ back-end server implementation framework
4// for the OPeNDAP Data Access Protocol.
5
6// Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7// Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact University Corporation for Atmospheric Research at
24// 3080 Center Green Drive, Boulder, CO 80301
25
26// (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27// Please read the full copyright statement in the file COPYRIGHT_UCAR.
28//
29// Authors:
30// pwest Patrick West <pwest@ucar.edu>
31// jgarcia Jose Garcia <jgarcia@ucar.edu>
32
33#include "config.h"
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/resource.h>
38
39#include <fcntl.h>
40#if HAVE_UNISTD_H
41#include <unistd.h>
42#endif
43
44
45#include <thread> // std::this_thread::sleep_for
46#include <chrono> // std::chrono::seconds
47#include <string> // std::string, std::stol
48#include <cstdio>
49#include <cerrno>
50#include <cstring>
51#include <cstdlib>
52#include <ctime>
53#include <cassert>
54#include <vector>
55#include <list>
56#include <sstream>
57#include <iostream>
58#include <algorithm>
59
60#include "TheBESKeys.h"
61#include "BESUtil.h"
62#include "BESDebug.h"
63#include "BESForbiddenError.h"
64#include "BESNotFoundError.h"
65#include "BESInternalError.h"
66#include "BESLog.h"
67#include "BESCatalogList.h"
68
69#include "BESInternalFatalError.h"
70#include "RequestServiceTimer.h"
71
72using namespace std;
73
74#define CRLF "\r\n"
75
76#define MODULE "util"
77#define prolog string("BESUtil::").append(__func__).append("() - ")
78
79const string BES_KEY_TIMEOUT_CANCEL = "BES.CancelTimeoutOnSend";
80
85long
87{
88 struct rusage usage;
89 if (getrusage(RUSAGE_SELF, &usage) == 0) { // getrusage() successful?
90#ifdef __APPLE__
91 // get the max size (man page says it is in bytes). This function returns the
92 // size in KB like Linux. jhrg 3/29/22
93 return usage.ru_maxrss / 1024;
94#else
95 return usage.ru_maxrss; // get the max size (man page says it is in kilobytes)
96#endif
97 }
98 else {
99 return 0;
100 }
101}
102
111{
112 if (!value.empty() && value.back() == '/')
113 value.pop_back(); // requires C++-11
114 // value.erase(value.end () -1);
115}
116
124{
125 if (!value.empty() && value[0] == '"')
126 value.erase(0, 1);
127 if (!value.empty() && value.back() == '"')
128 value.pop_back(); // requires C++-11
129 // value.erase(value.end () -1);
130}
131
136void BESUtil::set_mime_text(ostream &strm)
137{
138 strm << "HTTP/1.0 200 OK" << CRLF;
139 strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
140
141 const time_t t = time(0);
142 strm << "Date: " << rfc822_date(t).c_str() << CRLF;
143 strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
144
145 strm << "Content-Type: text/plain" << CRLF;
146 // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
147 strm << "Content-Description: unknown" << CRLF;
148 strm << CRLF;
149}
150
155void BESUtil::set_mime_html(ostream &strm)
156{
157 strm << "HTTP/1.0 200 OK" << CRLF;
158 strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
159
160 const time_t t = time(0);
161 strm << "Date: " << rfc822_date(t).c_str() << CRLF;
162 strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
163
164 strm << "Content-type: text/html" << CRLF;
165 // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
166 strm << "Content-Description: unknown" << CRLF;
167 strm << CRLF;
168}
169
170// Return a MIME rfc-822 date. The grammar for this is:
171// date-time = [ day "," ] date time ; dd mm yy
172// ; hh:mm:ss zzz
173//
174// day = "Mon" / "Tue" / "Wed" / "Thu"
175// / "Fri" / "Sat" / "Sun"
176//
177// date = 1*2DIGIT month 2DIGIT ; day month year
178// ; e.g. 20 Jun 82
179// NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
180//
181// month = "Jan" / "Feb" / "Mar" / "Apr"
182// / "May" / "Jun" / "Jul" / "Aug"
183// / "Sep" / "Oct" / "Nov" / "Dec"
184//
185// time = hour zone ; ANSI and Military
186//
187// hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]
188// ; 00:00:00 - 23:59:59
189//
190// zone = "UT" / "GMT" ; Universal Time
191// ; North American : UT
192// / "EST" / "EDT" ; Eastern: - 5/ - 4
193// / "CST" / "CDT" ; Central: - 6/ - 5
194// / "MST" / "MDT" ; Mountain: - 7/ - 6
195// / "PST" / "PDT" ; Pacific: - 8/ - 7
196// / 1ALPHA ; Military: Z = UT;
197// ; A:-1; (J not used)
198// ; M:-12; N:+1; Y:+12
199// / ( ("+" / "-") 4DIGIT ) ; Local differential
200// ; hours+min. (HHMM)
201
202static const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
203static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
204
214string BESUtil::rfc822_date(const time_t t)
215{
216 struct tm stm{};
217 gmtime_r(&t, &stm);
218 char d[256];
219
220 snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm.tm_wday], stm.tm_mday,
221 months[stm.tm_mon], 1900 + stm.tm_year, stm.tm_hour, stm.tm_min, stm.tm_sec);
222 d[255] = '\0';
223 return {d};
224}
225
226string BESUtil::unhexstring(const string& s)
227{
228 int val;
229 istringstream ss(s);
230 ss >> std::hex >> val;
231 char tmp_str[2];
232 tmp_str[0] = static_cast<char>(val);
233 tmp_str[1] = '\0';
234 return {tmp_str};
235}
236
237// I modified this to mirror the version in libdap. The change allows several
238// escape sequences to by listed in 'except'. jhrg 2/18/09
239string BESUtil::www2id(const string &in, const string &escape, const string &except)
240{
241 string::size_type i = 0;
242 string res = in;
243 while ((i = res.find_first_of(escape, i)) != string::npos) {
244 if (except.find(res.substr(i, 3)) != string::npos) {
245 i += 3;
246 continue;
247 }
248 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
249 }
250
251 return res;
252}
253
254string BESUtil::lowercase(const string &s)
255{
256 string return_string = s;
257 for (int j = 0; j < static_cast<int>(return_string.length()); j++) {
258 return_string[j] = (char) tolower(return_string[j]);
259 }
260
261 return return_string;
262}
263
264string BESUtil::unescape(const string &s)
265{
266 bool done = false;
267 string::size_type index = 0;
268 /* string::size_type new_index = 0 ; */
269 string new_str;
270 while (!done) {
271 string::size_type bs = s.find('\\', index);
272 if (bs == string::npos) {
273 new_str += s.substr(index, s.length() - index);
274 done = true;
275 }
276 else {
277 new_str += s.substr(index, bs - index);
278 new_str += s[bs + 1];
279 index = bs + 2;
280 }
281 }
282
283 return new_str;
284}
285
291static void throw_access_error(const string &pathname, long error_number)
292{
293 switch(error_number) {
294 case ENOENT:
295 case ENOTDIR: {
296 string message = string("Failed to locate '").append(pathname).append("'");
297 INFO_LOG(message);
298 throw BESNotFoundError(message, __FILE__, __LINE__);
299 }
300
301 default: {
302 string message = string("Not allowed to access '").append(pathname).append("'");
303 INFO_LOG(message);
304 throw BESForbiddenError(message, __FILE__, __LINE__);
305 }
306 }
307}
308
315bool pathname_contains_symlink(const string &path, int search_limit)
316{
317 // This kludge to remove a trailing '/' is needed because lstat and readlinkat fail
318 // to detect a dir symlink when the dir name ends in '/'. On OSX readlinkat (and readlink)
319 // does detect embedded links, but not on Linux. The lstat() service doesn't detect
320 // embedded links anywhere. jhrg 1/3/22
321 string pathname = path;
322 if (!pathname.empty() && pathname.back() == '/') {
323 pathname.pop_back();
324 }
325
326 bool is_link = false;
327 size_t pos;
328 int i = 0; // used with search_limit
329 do {
330 // test pathname
331 struct stat buf;
332 int status = lstat(pathname.c_str(), &buf);
333 if (status == 0) {
334 is_link = S_ISLNK(buf.st_mode);
335 }
336 else {
337 string msg = "Could not resolve path when testing for symbolic links: ";
338 msg.append(strerror(errno));
339 BESDEBUG(MODULE, prolog << msg << endl);
340 throw BESInternalError(msg, __FILE__, __LINE__);
341 }
342
343 // remove the last part of pathname, including the trailing '/'
344 pos = pathname.find_last_of('/');
345 if (pos != string::npos) // find_last_of returns npos if the char is not found
346 pathname.erase(pos);
347 } while (++i < search_limit && !is_link && pos != string::npos && !pathname.empty());
348
349 return is_link;
350
351#if 0
352 // ssize_t readlink(const char *restrict pathname, char *restrict buf, size_t bufsiz);
353 // readlinkat (or readlink) can be used to detect sym links in a path or to get the path
354 // to the linked file. Here we used it to test for sym links. 1/3/22 jhrg
355 ssize_t len = readlinkat(AT_FDCWD, pathname.c_str(), nullptr, 0);
356 if (len == -1) {
357 // either errno is EINVAL meaning this is not a link or there's really an error
358 switch (errno) {
359 case EINVAL:
360 return false;
361 default:
362 string msg = "Could not resolve path when testing for symbolic links: ";
363 msg.append(strerror(errno));
364 throw BESInternalError(msg, __FILE__, __LINE__);
365 }
366 }
367
368 return true; // If readlinkat() does not return -1, it's a symlink
369#endif
370}
371
382void BESUtil::check_path(const string &path, const string &root, bool follow_sym_links) {
383 // if nothing is passed in path, then the path checks out since root is assumed to be valid.
384 if (path == "") return;
385
386 if (path.find("..") != string::npos) {
387 throw_access_error(path, EACCES); // use the code for 'access would be denied'
388 }
389
390 // Check if the combination of root + path exists on this machine. If so, check if it
391 // has symbolic links. Return BESNotFoundError if it does not exist and BESForbiddenError
392 // if it does exist but contains symbolic links and follow_sym_links is false. jhrg 12/30/21
393
394 string pathname = root;
395
396 if (pathname.back() != '/' && path.front() != '/')
397 pathname.append("/");
398
399 pathname.append(path);
400 if (access(pathname.c_str(), R_OK) != 0) {
401 throw_access_error(pathname, errno);
402 }
403
404 if (follow_sym_links == false) {
405 auto n = count(path.begin(), path.end(), '/');
406 // using 'n' for the search_limit may not be optimal (when path ends in '/', an extra
407 // component may be searched) but it's better than testing for a trailing '/' on every call.
408 if (pathname_contains_symlink(pathname, n)) {
409 throw_access_error(pathname, EACCES); // use the code for 'access would be denied'
410 }
411 }
412}
413
414char *
415BESUtil::fastpidconverter(char *buf, int base)
416{
417 return fastpidconverter(getpid(), buf, base);
418}
419
420char *
421BESUtil::fastpidconverter(long val, /* value to be converted */
422char *buf, /* output string */
423int base) /* conversion base */
424{
425 ldiv_t r; /* result of val / base */
426
427 if (base > 36 || base < 2) /* no conversion if wrong base */
428 {
429 *buf = '\0';
430 return buf;
431 }
432 if (val < 0) *buf++ = '-';
433 r = ldiv(labs(val), base);
434
435 /* output digits of val/base first */
436
437 if (r.quot > 0) buf = fastpidconverter(r.quot, buf, base);
438 /* output last digit */
439
440 *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int) r.rem];
441 *buf = '\0';
442 return buf;
443}
444
446{
447 if (!key.empty()) {
448 string::size_type first = key.find_first_not_of(" \t\n\r");
449 string::size_type last = key.find_last_not_of(" \t\n\r");
450 if (first == string::npos)
451 key = "";
452 else {
453 string::size_type num = last - first + 1;
454 string new_key = key.substr(first, num);
455 key = new_key;
456 }
457 }
458}
459
460string BESUtil::entity(char c)
461{
462 switch (c) {
463 case '>':
464 return "&gt;";
465 case '<':
466 return "&lt;";
467 case '&':
468 return "&amp;";
469 case '\'':
470 return "&apos;";
471 case '\"':
472 return "&quot;";
473 default:
474 return string(1, c); // is this proper default, just the char?
475 }
476}
477
484string BESUtil::id2xml(string in, const string &not_allowed)
485{
486 string::size_type i = 0;
487
488 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
489 in.replace(i, 1, entity(in[i]));
490 i++;
491 }
492
493 return in;
494}
495
501string BESUtil::xml2id(string in)
502{
503 string::size_type i = 0;
504
505 while ((i = in.find("&gt;", i)) != string::npos)
506 in.replace(i, 4, ">");
507
508 i = 0;
509 while ((i = in.find("&lt;", i)) != string::npos)
510 in.replace(i, 4, "<");
511
512 i = 0;
513 while ((i = in.find("&amp;", i)) != string::npos)
514 in.replace(i, 5, "&");
515
516 i = 0;
517 while ((i = in.find("&apos;", i)) != string::npos)
518 in.replace(i, 6, "'");
519
520 i = 0;
521 while ((i = in.find("&quot;", i)) != string::npos)
522 in.replace(i, 6, "\"");
523
524 return in;
525}
526
540void BESUtil::explode(char delim, const string &str, list<string> &values)
541{
542 std::string::size_type start = 0;
543 std::string::size_type qstart = 0;
544 std::string::size_type adelim = 0;
545 std::string::size_type aquote = 0;
546 bool done = false;
547 while (!done) {
548 string aval;
549 if (str[start] == '"') {
550 bool endquote = false;
551 qstart = start + 1;
552 while (!endquote) {
553 aquote = str.find('"', qstart);
554 if (aquote == string::npos) {
555 string currval = str.substr(start, str.length() - start);
556 string err = "BESUtil::explode - No end quote after value " + currval;
557 throw BESInternalError(err, __FILE__, __LINE__);
558 }
559 // could be an escaped escape character and an escaped
560 // quote, or an escaped escape character and a quote
561 if (str[aquote - 1] == '\\') {
562 if (str[aquote - 2] == '\\') {
563 endquote = true;
564 qstart = aquote + 1;
565 }
566 else {
567 qstart = aquote + 1;
568 }
569 }
570 else {
571 endquote = true;
572 qstart = aquote + 1;
573 }
574 }
575 if (str[qstart] != delim && qstart != str.length()) {
576 string currval = str.substr(start, qstart - start);
577 string err = "BESUtil::explode - No delim after end quote " + currval;
578 throw BESInternalError(err, __FILE__, __LINE__);
579 }
580 if (qstart == str.length()) {
581 adelim = string::npos;
582 }
583 else {
584 adelim = qstart;
585 }
586 }
587 else {
588 adelim = str.find(delim, start);
589 }
590 if (adelim == string::npos) {
591 aval = str.substr(start, str.length() - start);
592 done = true;
593 }
594 else {
595 aval = str.substr(start, adelim - start);
596 }
597
598 values.push_back(aval);
599 start = adelim + 1;
600 if (start == str.length()) {
601 values.push_back("");
602 done = true;
603 }
604 }
605}
606
617string BESUtil::implode(const list<string> &values, char delim)
618{
619 string result;
620 list<string>::const_iterator i = values.begin();
621 list<string>::const_iterator e = values.end();
622 bool first = true;
623 string::size_type d; // = string::npos ;
624 for (; i != e; i++) {
625 if (!first) result += delim;
626 d = (*i).find(delim);
627 if (d != string::npos && (*i)[0] != '"') {
628 string err = (string) "BESUtil::implode - delimiter exists in value " + (*i);
629 throw BESInternalError(err, __FILE__, __LINE__);
630 }
631 //d = string::npos ;
632 result += (*i);
633 first = false;
634 }
635 return result;
636}
637
657void BESUtil::url_explode(const string &url_str, BESUtil::url &url_parts)
658{
659 string rest;
660
661 string::size_type colon = url_str.find(":");
662 if (colon == string::npos) {
663 string err = "BESUtil::url_explode: missing colon for protocol";
664 throw BESInternalError(err, __FILE__, __LINE__);
665 }
666
667 url_parts.protocol = url_str.substr(0, colon);
668
669 if (url_str.substr(colon, 3) != "://") {
670 string err = "BESUtil::url_explode: no :// in the URL";
671 throw BESInternalError(err, __FILE__, __LINE__);
672 }
673
674 colon += 3;
675 rest = url_str.substr(colon);
676
677 string::size_type slash = rest.find("/");
678 if (slash == string::npos) slash = rest.length();
679
680 string::size_type at = rest.find("@");
681 if ((at != string::npos) && (at < slash)) {
682 // everything before the @ is username:password
683 string up = rest.substr(0, at);
684 colon = up.find(":");
685 if (colon != string::npos) {
686 url_parts.uname = up.substr(0, colon);
687 url_parts.psswd = up.substr(colon + 1);
688 }
689 else {
690 url_parts.uname = up;
691 }
692 // everything after the @ is domain/path
693 rest = rest.substr(at + 1);
694 }
695 slash = rest.find("/");
696 if (slash == string::npos) slash = rest.length();
697 colon = rest.find(":");
698 if ((colon != string::npos) && (colon < slash)) {
699 // everything before the colon is the domain
700 url_parts.domain = rest.substr(0, colon);
701 // everything after the folon is port/path
702 rest = rest.substr(colon + 1);
703 slash = rest.find("/");
704 if (slash != string::npos) {
705 url_parts.port = rest.substr(0, slash);
706 url_parts.path = rest.substr(slash + 1);
707 }
708 else {
709 url_parts.port = rest;
710 url_parts.path = "";
711 }
712 }
713 else {
714 slash = rest.find("/");
715 if (slash != string::npos) {
716 url_parts.domain = rest.substr(0, slash);
717 url_parts.path = rest.substr(slash + 1);
718 }
719 else {
720 url_parts.domain = rest;
721 }
722 }
723}
724
725string BESUtil::url_create(BESUtil::url &url_parts)
726{
727 string url = url_parts.protocol + "://";
728 if (!url_parts.uname.empty()) {
729 url += url_parts.uname;
730 if (!url_parts.psswd.empty()) url += ":" + url_parts.psswd;
731 url += "@";
732 }
733 url += url_parts.domain;
734 if (!url_parts.port.empty()) url += ":" + url_parts.port;
735 if (!url_parts.path.empty()) url += "/" + url_parts.path;
736
737 return url;
738}
739
740
751string BESUtil::pathConcat(const string &firstPart, const string &secondPart, char separator)
752{
753 string first = firstPart;
754 string second = secondPart;
755 string sep(1,separator);
756
757 // make sure there are not multiple slashes at the end of the first part...
758 // Note that this removes all the slashes. jhrg 9/27/16
759 while (!first.empty() && *first.rbegin() == separator) {
760 // C++-11 first.pop_back();
761 first = first.substr(0, first.length() - 1);
762 }
763 // make sure second part does not BEGIN with a slash
764 while (!second.empty() && second[0] == separator) {
765 // erase is faster? second = second.substr(1);
766 second.erase(0, 1);
767 }
768 string newPath;
769 if (first.empty()) {
770 newPath = second;
771 }
772 else if (second.empty()) {
773 newPath = first;
774 }
775 else {
776 newPath = first.append(sep).append(second);
777 }
778 return newPath;
779}
801string BESUtil::assemblePath(const string &firstPart, const string &secondPart, bool leadingSlash, bool trailingSlash)
802{
803 BESDEBUG(MODULE, prolog << "firstPart: '" << firstPart << "'" << endl);
804 BESDEBUG(MODULE, prolog << "secondPart: '" << secondPart << "'" << endl);
805
806 string newPath = BESUtil::pathConcat(firstPart, secondPart);
807 if (leadingSlash) {
808 if (newPath.empty()) {
809 newPath = "/";
810 }
811 else if (newPath.front() != '/') {
812 newPath = "/" + newPath;
813 }
814 }
815
816 if (trailingSlash) {
817 if (newPath.empty() || newPath.back() != '/') {
818 newPath.append("/");
819 }
820 }
821 else {
822 while (!newPath.empty() && newPath.back() == '/')
823 newPath.erase(newPath.length()-1);
824 }
825
826 BESDEBUG(MODULE, prolog << "newPath: " << newPath << endl);
827 return newPath;
828}
829
834bool BESUtil::endsWith(string const &fullString, string const &ending)
835{
836 if (fullString.length() >= ending.length()) {
837 return (0 == fullString.compare(fullString.length() - ending.length(), ending.length(), ending));
838 }
839 else {
840 return false;
841 }
842}
843
866{
867 if (RequestServiceTimer::TheTimer()->is_expired()) {
868 stringstream msg;
869 msg << "The submitted request took too long to service.";
870 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
871 }
872}
873
896{
897 const string false_str = "false";
898 const string no_str = "no";
899
900 bool cancel_timeout_on_send = true;
901 bool found = false;
902 string value;
903
904 TheBESKeys::TheKeys()->get_value(BES_KEY_TIMEOUT_CANCEL, value, found);
905 if (found) {
906 value = BESUtil::lowercase(value);
907 if ( value == false_str || value == no_str) cancel_timeout_on_send = false;
908 }
909 BESDEBUG(MODULE, __func__ << "() - cancel_timeout_on_send: " << (cancel_timeout_on_send ? "true" : "false") << endl);
910 if (cancel_timeout_on_send) {
912 alarm(0);
913 }
914}
915
921unsigned int BESUtil::replace_all(string &s, string find_this, string replace_with_this)
922{
923 unsigned int replace_count = 0;
924 size_t pos = s.find(find_this);
925 while (pos != string::npos) {
926 // Replace current matching substring
927 s.replace(pos, find_this.size(), replace_with_this);
928 // Get the next occurrence from current position
929 pos = s.find(find_this, pos + replace_with_this.size());
930 replace_count++;
931 }
932 return replace_count;
933}
934
946string BESUtil::normalize_path(const string &raw_path, bool leading_separator, bool trailing_separator, const string separator /* = "/" */)
947{
948 if (separator.length() != 1)
949 throw BESInternalError("Path separators must be a single character. The string '" + separator + "' does not qualify.", __FILE__, __LINE__);
950 char separator_char = separator[0];
951 string double_separator;
952 double_separator = double_separator.append(separator).append(separator);
953
954 string path(raw_path);
955
956 replace_all(path, double_separator, separator);
957
958 if (path.empty()) {
959 path = separator;
960 }
961 if (path == separator) {
962 return path;
963 }
964 if (leading_separator) {
965 if (path[0] != separator_char) {
966 path = string(separator).append(path);
967 }
968 }
969 else {
970 if (path[0] == separator_char) {
971 path = path.substr(1);
972 }
973 }
974 if (trailing_separator) {
975 if (*path.rbegin() != separator_char) {
976 path = path.append(separator);
977 }
978 }
979 else {
980 if (*path.rbegin() == separator_char) {
981 path = path.substr(0, path.length() - 1);
982 }
983 }
984 return path;
985}
986
992void BESUtil::tokenize(const string& str, vector<string>& tokens, const string& delimiters /* = "/" */)
993{
994 // Skip delimiters at beginning.
995 string::size_type lastPos = str.find_first_not_of(delimiters, 0);
996 // Find first "non-delimiter".
997 string::size_type pos = str.find_first_of(delimiters, lastPos);
998 while (string::npos != pos || string::npos != lastPos) {
999 // Found a token, add it to the vector.
1000 tokens.push_back(str.substr(lastPos, pos - lastPos));
1001 // Skip delimiters. Note the "not_of"
1002 lastPos = str.find_first_not_of(delimiters, pos);
1003 // Find next "non-delimiter"
1004 pos = str.find_first_of(delimiters, lastPos);
1005 }
1006}
1007
1014string BESUtil::get_time(bool use_local_time)
1015{
1016 return get_time(time(0), use_local_time);
1017}
1018
1026string BESUtil::get_time(time_t the_time, bool use_local_time)
1027{
1028 char buf[sizeof "YYYY-MM-DDTHH:MM:SS zones"];
1029 int status = 0;
1030
1031 // From StackOverflow:
1032 // This will work too, if your compiler doesn't support %F or %T:
1033 // strftime(buf, sizeof buf, "%Y-%m-%dT%H:%M:%S%Z", gmtime(&now));
1034 //
1035 // UTC is the default. Override to local time based on the
1036 // passed parameter 'use_local_time'
1037 struct tm result{};
1038 if (!use_local_time) {
1039 gmtime_r(&the_time, &result);
1040 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
1041 }
1042 else {
1043 localtime_r(&the_time, &result);
1044 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
1045 }
1046
1047 if (!status) {
1048 ERROR_LOG(prolog + "Error formatting time value!");
1049 return "date-format-error";
1050 }
1051
1052 return buf;
1053}
1054
1065vector<string> BESUtil::split(const string &s, char delim /* '/' */, bool skip_empty /* true */)
1066{
1067 stringstream ss(s);
1068 string item;
1069 vector<string> tokens;
1070
1071 while (getline(ss, item, delim)) {
1072
1073 if (item.empty() && skip_empty)
1074 continue;
1075
1076 tokens.push_back(item);
1077 }
1078
1079 return tokens;
1080}
1081
1082BESCatalog *BESUtil::separateCatalogFromPath(std::string &ppath)
1083{
1084 BESCatalog *catalog = 0; // pointer to a singleton; do not delete
1085 vector<string> path_tokens;
1086
1087 // BESUtil::normalize_path() removes duplicate separators and adds leading and trailing separators as directed.
1088 string path = BESUtil::normalize_path(ppath, false, false);
1089 BESDEBUG(MODULE, prolog << "Normalized path: " << path << endl);
1090
1091 // Because we may need to alter the container/file/resource name by removing
1092 // a catalog name from the first node in the path we use "use_container" to store
1093 // the altered container path.
1094 string use_container = ppath;
1095
1096 // Breaks path into tokens
1097 BESUtil::tokenize(path, path_tokens);
1098 if (!path_tokens.empty()) {
1099 BESDEBUG(MODULE, "First path token: " << path_tokens[0] << endl);
1100 catalog = BESCatalogList::TheCatalogList()->find_catalog(path_tokens[0]);
1101 if (catalog) {
1102 BESDEBUG(MODULE, prolog << "Located catalog " << catalog->get_catalog_name() << " from path component" << endl);
1103 // Since the catalog name is in the path we
1104 // need to drop it this should leave container
1105 // with a leading
1106 ppath = BESUtil::normalize_path(path.substr(path_tokens[0].length()), true, false);
1107 BESDEBUG(MODULE, prolog << "Modified container/path value to: " << use_container << endl);
1108 }
1109 }
1110
1111 return catalog;
1112}
1113
1114void ios_state_msg(std::ios &ios_ref, std::stringstream &msg) {
1115 msg << " {ios.good()=" << (ios_ref.good() ? "true" : "false") << "}";
1116 msg << " {ios.eof()=" << (ios_ref.eof()?"true":"false") << "}";
1117 msg << " {ios.fail()=" << (ios_ref.fail()?"true":"false") << "}";
1118 msg << " {ios.bad()=" << (ios_ref.bad()?"true":"false") << "}";
1119}
1120
1121// size of the buffer used to read from the temporary file built on disk and
1122// send data to the client over the network connection (socket/stream)
1123#define OUTPUT_FILE_BLOCK_SIZE 4096
1124
1132void BESUtil::file_to_stream(const std::string &file_name, std::ostream &o_strm)
1133{
1134 stringstream msg;
1135 msg << prolog << "Using ostream: " << (void *) &o_strm << " cout: " << (void *) &cout << endl;
1136 BESDEBUG(MODULE, msg.str());
1137 INFO_LOG( msg.str());
1138
1139 vector<char> rbuffer(OUTPUT_FILE_BLOCK_SIZE);
1140 std::ifstream i_stream(file_name, std::ios_base::in | std::ios_base::binary); // Use binary mode so we can
1141
1142 // good() returns true if !(eofbit || badbit || failbit)
1143 if(!i_stream.good()){
1144 stringstream msg;
1145 msg << prolog << "Failed to open file " << file_name;
1146 ios_state_msg(i_stream, msg);
1147 BESDEBUG(MODULE, msg.str() << endl);
1148 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1149 }
1150
1151 // good() returns true if !(eofbit || badbit || failbit)
1152 if(!o_strm.good()){
1153 stringstream msg;
1154 msg << prolog << "Problem with ostream. " << file_name;
1155 ios_state_msg(i_stream, msg);
1156 BESDEBUG(MODULE, msg.str() << endl);
1157 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1158 }
1159
1160 //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
1161 // This is where the file is copied.
1162 uint64_t tcount = 0;
1163 while (i_stream.good() && o_strm.good()){
1164 i_stream.read(rbuffer.data(), OUTPUT_FILE_BLOCK_SIZE); // Read at most n bytes into
1165 o_strm.write(rbuffer.data(), i_stream.gcount()); // buf, then write the buf to
1166 tcount += i_stream.gcount();
1167 }
1168 o_strm.flush();
1169
1170 // fail() is true if failbit || badbit got set, but does not consider eofbit
1171 if(i_stream.fail() && !i_stream.eof()){
1172 stringstream msg;
1173 msg << prolog << "There was an ifstream error when reading from: " << file_name;
1174 ios_state_msg(i_stream, msg);
1175 msg << " last_lap: " << i_stream.gcount() << " bytes";
1176 msg << " total_read: " << tcount << " bytes";
1177 BESDEBUG(MODULE, msg.str() << endl);
1178 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1179 }
1180
1181 // If we're not at the eof of the input stream then we have failed.
1182 if (!i_stream.eof()){
1183 stringstream msg;
1184 msg << prolog << "Failed to reach EOF on source file: " << file_name;
1185 ios_state_msg(i_stream, msg);
1186 msg << " last_lap: " << i_stream.gcount() << " bytes";
1187 msg << " total_read: " << tcount << " bytes";
1188 BESDEBUG(MODULE, msg.str() << endl);
1189 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1190 }
1191
1192 // And if something went wrong on the output stream we have failed.
1193 if(!o_strm.good()){
1194 stringstream msg;
1195 msg << prolog << "There was an ostream error during transmit. Transmitted " << tcount << " bytes.";
1196 ios_state_msg(o_strm, msg);
1197 auto crntpos = o_strm.tellp();
1198 msg << " current_position: " << crntpos << endl;
1199 BESDEBUG(MODULE, msg.str());
1200 ERROR_LOG(msg.str());
1201 }
1202
1203 msg.str("");
1204 msg << prolog << "Sent "<< tcount << " bytes from file '" << file_name<< "'. " << endl;
1205 BESDEBUG(MODULE,msg.str());
1206 INFO_LOG(msg.str());
1207}
1208
1209uint64_t BESUtil::file_to_stream_helper(const std::string &file_name, std::ostream &o_strm, uint64_t byteCount){
1210
1211 stringstream msg;
1212 msg << prolog << "Using ostream: " << (void *) &o_strm << " cout: " << (void *) &cout << endl;
1213 BESDEBUG(MODULE, msg.str());
1214 INFO_LOG( msg.str());
1215
1216 vector<char> rbuffer(OUTPUT_FILE_BLOCK_SIZE);
1217 std::ifstream i_stream(file_name, std::ios_base::in | std::ios_base::binary); // Use binary mode so we can
1218
1219 // good() returns true if !(eofbit || badbit || failbit)
1220 if(!i_stream.good()){
1221 stringstream msg;
1222 msg << prolog << "Failed to open file " << file_name;
1223 ios_state_msg(i_stream, msg);
1224 BESDEBUG(MODULE, msg.str() << endl);
1225 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1226 }
1227
1228 // good() returns true if !(eofbit || badbit || failbit)
1229 if(!o_strm.good()){
1230 stringstream msg;
1231 msg << prolog << "Problem with ostream. " << file_name;
1232 ios_state_msg(i_stream, msg);
1233 BESDEBUG(MODULE, msg.str() << endl);
1234 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1235 }
1236
1237 // this is where we advance to the last byte that was read
1238 i_stream.seekg(byteCount);
1239
1240 //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
1241 // This is where the file is copied.
1242 while (i_stream.good() && o_strm.good()){
1243 i_stream.read(rbuffer.data(), OUTPUT_FILE_BLOCK_SIZE); // Read at most n bytes into
1244 o_strm.write(rbuffer.data(), i_stream.gcount()); // buf, then write the buf to
1245 BESDEBUG(MODULE, "i_stream: " << i_stream.gcount() << endl);
1246 byteCount += i_stream.gcount();
1247 }
1248 o_strm.flush();
1249
1250 // fail() is true if failbit || badbit got set, but does not consider eofbit
1251 if(i_stream.fail() && !i_stream.eof()){
1252 stringstream msg;
1253 msg << prolog << "There was an ifstream error when reading from: " << file_name;
1254 ios_state_msg(i_stream, msg);
1255 msg << " last_lap: " << i_stream.gcount() << " bytes";
1256 msg << " total_read: " << byteCount << " bytes";
1257 BESDEBUG(MODULE, msg.str() << endl);
1258 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1259 }
1260
1261 // If we're not at the eof of the input stream then we have failed.
1262 if (!i_stream.eof()){
1263 stringstream msg;
1264 msg << prolog << "Failed to reach EOF on source file: " << file_name;
1265 ios_state_msg(i_stream, msg);
1266 msg << " last_lap: " << i_stream.gcount() << " bytes";
1267 msg << " total_read: " << byteCount << " bytes";
1268 BESDEBUG(MODULE, msg.str() << endl);
1269 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1270 }
1271
1272 // And if something went wrong on the output stream we have failed.
1273 if(!o_strm.good()){
1274 stringstream msg;
1275 msg << prolog << "There was an ostream error during transmit. Transmitted " << byteCount << " bytes.";
1276 ios_state_msg(o_strm, msg);
1277 auto crntpos = o_strm.tellp();
1278 msg << " current_position: " << crntpos << endl;
1279 BESDEBUG(MODULE, msg.str());
1280 ERROR_LOG(msg.str());
1281 }
1282
1283 msg.str(prolog);
1284 msg << "Sent "<< byteCount << " bytes from file '" << file_name<< "'. " << endl;
1285 BESDEBUG(MODULE,msg.str());
1286 INFO_LOG(msg.str());
1287
1288 i_stream.close();
1289
1290 return byteCount;
1291}
1292
1293
1294// I added this because maybe using the low-level file calls was important. I'm not
1295// sure and the iostreams in C++ are safer. jhrg 6/4/21
1296#define FILE_CALLS 0
1297
1303uint64_t BESUtil::file_to_stream_task(const std::string &file_name, std::atomic<bool> &file_write_done, std::ostream &o_strm) {
1304 stringstream msg;
1305 msg << prolog << "Using ostream: " << (void *) &o_strm << " cout: " << (void *) &cout << endl;
1306 BESDEBUG(MODULE, msg.str());
1307 INFO_LOG(msg.str());
1308
1309 vector<char> rbuffer(OUTPUT_FILE_BLOCK_SIZE);
1310
1311 std::ifstream i_stream(file_name, std::ios_base::in | std::ios_base::binary);
1312#if FILE_CALLS
1313 int fd = open(file_name.c_str(), O_RDONLY | O_NONBLOCK);
1314 int eof = false;
1315#endif
1316
1317 //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
1318 // This is where the file is copied.
1319 BESDEBUG(MODULE, "Starting transfer" << endl);
1320 uint64_t tcount = 0;
1321 while (!i_stream.bad() && !i_stream.fail() && o_strm.good()) {
1322 if (file_write_done && i_stream.eof()) {
1323 BESDEBUG(MODULE, "breaking out of loop" << endl);
1324 break;
1325 }
1326 else {
1327 i_stream.read(rbuffer.data(), OUTPUT_FILE_BLOCK_SIZE); // Read at most n bytes into
1328
1329#if FILE_CALLS
1330 int status = read(fd, rbuffer.data(), OUTPUT_FILE_BLOCK_SIZE);
1331 if (status == 0) {
1332 eof = true;
1333 }
1334 else if (status == -1) {
1335 BESDEBUG(MODULE, "read() call error: " << errno << endl);
1336 }
1337
1338 o_strm.write(rbuffer.data(), status); // buf, then write the buf to
1339 tcount += status;
1340#endif
1341
1342 o_strm.write(rbuffer.data(), i_stream.gcount()); // buf, then write the buf to
1343 tcount += i_stream.gcount();
1344 BESDEBUG(MODULE, "transfer bytes " << tcount << endl);
1345 }
1346 }
1347
1348#if FILE_CALLS
1349 close(fd);
1350#endif
1351 o_strm.flush();
1352
1353 // And if something went wrong on the output stream we have failed.
1354 if(!o_strm.good()){
1355 stringstream msg;
1356 msg << prolog << "There was an ostream error during transmit. Transmitted " << tcount << " bytes.";
1357 ios_state_msg(o_strm, msg);
1358 auto crntpos = o_strm.tellp();
1359 msg << " current_position: " << crntpos << endl;
1360 BESDEBUG(MODULE, msg.str());
1361 INFO_LOG(msg.str());
1362 }
1363
1364 msg.str(prolog);
1365 msg << "Sent "<< tcount << " bytes from file '" << file_name<< "'. " << endl;
1366 BESDEBUG(MODULE,msg.str());
1367 INFO_LOG(msg.str());
1368
1369 return tcount;
1370}
1371
1372#if 0
1378
1385void BESUtil::split(const string &s, const string &delimiter, vector<uint64_t> &res)
1386{
1387 const size_t delim_len = delimiter.length();
1388
1389 size_t pos_start = 0, pos_end;
1390
1391 while ((pos_end = s.find (delimiter, pos_start)) != string::npos) {
1392 res.push_back (stoull(s.substr(pos_start, pos_end - pos_start)));
1393 pos_start = pos_end + delim_len;
1394 }
1395
1396 res.push_back (stoull(s.substr (pos_start)));
1397}
1398
1408void BESUtil::split(const string &s, const string &delimiter, vector<string> &res)
1409{
1410 const size_t delim_len = delimiter.length();
1411
1412 size_t pos_start = 0, pos_end;
1413
1414 while ((pos_end = s.find (delimiter, pos_start)) != string::npos) {
1415 res.push_back(s.substr(pos_start, pos_end - pos_start));
1416 pos_start = pos_end + delim_len;
1417 }
1418
1419 res.push_back(s.substr (pos_start));
1420}
1421#endif
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:102
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition: BESUtil.cc:1065
static uint64_t file_to_stream_task(const std::string &file_name, std::atomic< bool > &file_write_done, std::ostream &o_strm)
Definition: BESUtil.cc:1303
static void explode(char delim, const std::string &str, std::list< std::string > &values)
Definition: BESUtil.cc:540
static long get_current_memory_usage() noexcept
Get the Resident Set Size in KB.
Definition: BESUtil.cc:86
static void url_explode(const std::string &url_str, BESUtil::url &url_parts)
Given a url, break the url into its different parts.
Definition: BESUtil.cc:657
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:834
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:992
static void set_mime_text(std::ostream &strm)
Generate an HTTP 1.0 response header for a text document.
Definition: BESUtil.cc:136
static std::string id2xml(std::string in, const std::string &not_allowed="><&'\"")
Definition: BESUtil.cc:484
static void conditional_timeout_cancel()
Checks if the timeout alarm should be canceled based on the value of the BES key BES....
Definition: BESUtil.cc:895
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition: BESUtil.cc:382
static void exit_on_request_timeout()
Checks if the timeout alarm should be canceled based on the value of the BES key BES....
Definition: BESUtil.cc:865
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
Definition: BESUtil.cc:921
static void set_mime_html(std::ostream &strm)
Generate an HTTP 1.0 response header for a html document.
Definition: BESUtil.cc:155
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:254
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:751
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:801
static std::string www2id(const std::string &in, const std::string &escape="%", const std::string &except="")
Definition: BESUtil.cc:239
static std::string implode(const std::list< std::string > &values, char delim)
Definition: BESUtil.cc:617
static void trim_if_surrounding_quotes(std::string &value)
Remove double quotes around a string This function will remove a leading and/or trailing double quote...
Definition: BESUtil.cc:123
static void file_to_stream(const std::string &file_name, std::ostream &o_strm)
Copies the contents of the file identified by file_name to the stream o_strm.
Definition: BESUtil.cc:1132
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition: BESUtil.cc:946
static std::string xml2id(std::string in)
Definition: BESUtil.cc:501
static void trim_if_trailing_slash(std::string &value)
If the string ends in a slash, remove it This function works for empty strings (doing nothing)....
Definition: BESUtil.cc:110
static std::string unescape(const std::string &s)
Definition: BESUtil.cc:264
static char * fastpidconverter(char *buf, int base)
Definition: BESUtil.cc:415
static void removeLeadingAndTrailingBlanks(std::string &key)
Definition: BESUtil.cc:445
static std::string get_time(bool use_local_time=false)
Definition: BESUtil.cc:1014
static RequestServiceTimer * TheTimer()
Return a pointer to a singleton timer instance. If an instance does not exist it will create and init...
void disable_timeout()
Set the time_out is disabled.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71