bes Updated for version 3.20.13
HttpCache.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of the BES http package, part of the Hyrax data server.
5
6// Copyright (c) 2020 OPeNDAP, Inc.
7// Author: Nathan Potter <ndp@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25// Authors:
26// ndp Nathan Potter <ndp@opendap.org>
27
28#include <config.h>
29
30#include <sys/stat.h>
31
32#include <string>
33#include <fstream>
34#include <sstream>
35#include <vector>
36
37#include <cstdlib>
38
39#include "PicoSHA2/picosha2.h"
40
41#include <BESInternalError.h>
42#include <BESDebug.h>
43#include <BESUtil.h>
44#include <TheBESKeys.h>
45
46#include "HttpCache.h"
47#include "HttpUtils.h"
48#include "HttpNames.h"
49#include "url_impl.h"
50
51#ifdef HAVE_ATEXIT
52#define AT_EXIT(x) atexit((x))
53#else
54#define AT_EXIT(x)
55#endif
56#define prolog string("HttpCache::").append(__func__).append("() - ")
57
58
59using std::endl;
60using std::string;
61using std::vector;
62using std::stringstream;
63
64namespace http {
65
66 HttpCache *HttpCache::d_instance = 0;
67 bool HttpCache::d_enabled = true;
68
69 unsigned long HttpCache::getCacheSizeFromConfig() {
70 bool found = false;
71 string size;
72 unsigned long size_in_megabytes = 0;
73 TheBESKeys::TheKeys()->get_value(HTTP_CACHE_SIZE_KEY, size, found);
74
75 if (found) {
76 std::istringstream iss(size);
77 iss >> size_in_megabytes;
78 } else {
79 stringstream msg;
80 msg << prolog << "The BES Key " << HTTP_CACHE_SIZE_KEY << " is not set.";
81 BESDEBUG(HTTP_MODULE, msg.str() << endl);
82 throw BESInternalError(msg.str(), __FILE__, __LINE__);
83 }
84
85 return size_in_megabytes;
86 }
87
88 unsigned long HttpCache::getCacheExpiresTime() {
89 bool found = false;
90 string time;
91 unsigned long time_in_seconds = 0;
92 TheBESKeys::TheKeys()->get_value(HTTP_CACHE_EXPIRES_TIME_KEY, time, found);
93
94 if (found) {
95 std::istringstream iss(time);
96 iss >> time_in_seconds;
97 } else {
98 time_in_seconds = REMOTE_RESOURCE_DEFAULT_EXPIRED_INTERVAL;
99 }
100
101 return time_in_seconds;
102 }
103
104 string HttpCache::getCacheDirFromConfig() {
105 bool found;
106 string subdir = "";
107 TheBESKeys::TheKeys()->get_value(HTTP_CACHE_DIR_KEY, subdir, found);
108
109 if (!found) {
110 stringstream msg;
111 msg << prolog << "The BES Key " << HTTP_CACHE_DIR_KEY << " is not set.";
112 BESDEBUG(HTTP_MODULE, msg.str() << endl);
113 throw BESInternalError(msg.str(), __FILE__, __LINE__);
114 }
115
116 return subdir;
117 }
118
119 string HttpCache::getCachePrefixFromConfig() {
120 bool found;
121 string prefix = "";
122 TheBESKeys::TheKeys()->get_value(HTTP_CACHE_PREFIX_KEY, prefix, found);
123
124 if (found) {
125 prefix = BESUtil::lowercase(prefix);
126 } else {
127 stringstream msg;
128 msg << prolog << "The BES Key " << HTTP_CACHE_PREFIX_KEY << " is not set.";
129 BESDEBUG(HTTP_MODULE, msg.str() << endl);
130 throw BESInternalError(msg.str(), __FILE__, __LINE__);
131 }
132
133 return prefix;
134 }
135
136 HttpCache::HttpCache() {
137 BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
138
139 string cacheDir = getCacheDirFromConfig();
140 string cachePrefix = getCachePrefixFromConfig();
141 unsigned long cacheSizeMbytes = getCacheSizeFromConfig();
142
143 BESDEBUG(HTTP_MODULE, prolog << "Cache configuration params: " << cacheDir << ", " << cachePrefix << ", "
144 << cacheSizeMbytes << endl);
145 initialize(cacheDir, cachePrefix, cacheSizeMbytes);
146
147 BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
148 }
149
150#if 1
151 HttpCache::HttpCache(const string &cache_dir, const string &prefix, unsigned long long size) {
152
153 BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
154
155 initialize(cache_dir, prefix, size);
156
157 BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
158 }
159#endif
160#if 0
161 HttpCache *
162 HttpCache::get_instance(const string &cache_dir, const string &cache_file_prefix,
163 unsigned long long max_cache_size) {
164 if (d_enabled && d_instance == 0) {
165 if (dir_exists(cache_dir)) {
166 d_instance = new HttpCache(cache_dir, cache_file_prefix, max_cache_size);
167 d_enabled = d_instance->cache_enabled();
168 if (!d_enabled) {
169 delete d_instance;
170 d_instance = 0;
171 BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is DISABLED" << endl);
172 } else {
173 AT_EXIT(delete_instance);
174
175 BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is ENABLED" << endl);
176 }
177 }
178 }
179
180 return d_instance;
181 }
182#endif
183
187 HttpCache *
189 if (d_enabled && d_instance == 0) {
190 try {
191 d_instance = new HttpCache();
192 d_enabled = d_instance->cache_enabled();
193 if (!d_enabled) {
194 delete d_instance;
195 d_instance = 0;
196 BESDEBUG(HTTP_MODULE, prolog << "Cache is DISABLED" << endl);
197 } else {
198 AT_EXIT(delete_instance);
199
200 BESDEBUG(HTTP_MODULE, prolog << "Cache is ENABLED" << endl);
201 }
202 }
203 catch (BESInternalError &bie) {
204 BESDEBUG(HTTP_MODULE,
205 "[ERROR] HttpCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message()
206 << endl);
207 }
208 }
209
210 return d_instance;
211 }
212
213#if HASH_CACHE_FILENAME
214
215 string
216 HttpCache::get_hash(const string &s)
217 {
218 if (s.empty()){
219 string msg = "You cannot hash the empty string.";
220 BESDEBUG(HTTP_MODULE, prolog << msg << endl);
221 throw BESInternalError(msg, __FILE__, __LINE__);
222 }
223 return picosha2::hash256_hex_string(s[0] == '/' ? s : "/" + s);
224 }
225
226
227 bool is_url(const string &candidate){
228 size_t index = candidate.find(HTTP_PROTOCOL);
229 if(index){
230 index = candidate.find(HTTPS_PROTOCOL);
231 if(index){
232 return false;
233 }
234 }
235 return true;
236 }
237
238
246 string get_real_name_extension(const string &identifier){
247 string real_name_extension;
248
249 string path_part;
250
251 if(is_url(identifier)) {
252 // Since it's a URL it might have a massive query string attached, and since wee
253 // have no idea what the query parameters mean, we'll just punt and look at the path part of the URL.
254 // We make an instance of http::url which will carve up the URL for us.
255 http::url target_url(identifier);
256 path_part = target_url.path();
257 }
258 else {
259 path_part = identifier;
260 }
261
262 vector<string> path_elements;
263 // Now that we a "path" (none of that query string mess) we can tokenize it.
264 BESUtil::tokenize(path_part,path_elements);
265 if(!path_elements.empty()){
266 string last = path_elements.back();
267 if(last != path_part)
268 real_name_extension = "#" + last; // This utilizes a hack in libdap
269 }
270 return real_name_extension;
271 }
272
273
282 string HttpCache::get_cache_file_name(const string &uid, const string &src_id, bool mangle){
283 stringstream cache_filename;
284 string hashed_part;
285 string real_name_extension;
286 string uid_part;
287
288 if(!uid.empty())
289 uid_part = uid + "_";
290
291 if(mangle){
292 hashed_part = get_hash(src_id);
293 }
294 else {
295 hashed_part = src_id;
296 }
297 real_name_extension = get_real_name_extension(src_id);
298
299 cache_filename << get_cache_file_prefix() << uid_part << hashed_part << real_name_extension;
300
301 string cf_name = BESUtil::assemblePath(this->get_cache_directory(), cache_filename.str() );
302
303 return cf_name;
304 }
305
306
307 string HttpCache::get_cache_file_name( const string &src, bool mangle){
308 string uid;
309 return get_cache_file_name(uid,src, mangle);
310 }
311
312
313#endif
314
315} // namespace http
std::string get_message() const
get the error message for this exception
Definition: BESError.h:111
void initialize(const std::string &cache_dir, const std::string &prefix, unsigned long long size)
Initialize an instance of FileLockingCache.
const std::string get_cache_directory()
static bool dir_exists(const std::string &dir)
const std::string get_cache_file_prefix()
exception thrown if internal error encountered
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:992
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:254
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:801
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
static HttpCache * get_instance()
Definition: HttpCache.cc:188
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:282
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
string get_real_name_extension(const string &identifier)
Definition: HttpCache.cc:246