38#include "EffectiveUrlCache.h"
40#include "BESSyntaxUserError.h"
41#include "TheBESKeys.h"
43#include "BESStopWatch.h"
48#include "EffectiveUrl.h"
53#define MODULE_DUMPER "euc:dump"
54#define prolog std::string("EffectiveUrlCache::").append(__func__).append("() - ")
58EffectiveUrlCache *EffectiveUrlCache::d_instance =
nullptr;
59static std::once_flag d_euc_init_once;
72 std::call_once(d_euc_init_once,EffectiveUrlCache::initialize_instance);
81void EffectiveUrlCache::initialize_instance()
86 atexit(delete_instance);
94void EffectiveUrlCache::delete_instance()
105EffectiveUrlCache::~EffectiveUrlCache()
107 d_effective_urls.clear();
125 strm << BESIndent::LMarg << prolog <<
"(this: " << (
void *)
this <<
")" << endl;
127 strm << BESIndent::LMarg <<
"d_skip_regex: " << (d_skip_regex?d_skip_regex->pattern():
"WAS NOT SET") << endl;
128 if (!d_effective_urls.empty()) {
129 strm << BESIndent::LMarg <<
"effective url list:" << endl;
131 auto it = d_effective_urls.begin();
132 while( it!= d_effective_urls.end()){
133 strm << BESIndent::LMarg << (*it).first <<
" --> " << (*it).second->str();
136 BESIndent::UnIndent();
139 strm << BESIndent::LMarg <<
"effective url list: EMPTY" << endl;
141 BESIndent::UnIndent();
163shared_ptr<http::EffectiveUrl> EffectiveUrlCache::get_cached_eurl(
string const &url_key){
164 shared_ptr<http::EffectiveUrl> effective_url(
nullptr);
165 auto it = d_effective_urls.find(url_key);
166 if(it!=d_effective_urls.end()){
167 effective_url = (*it).second;
169 return effective_url;
189 std::lock_guard<std::mutex> lock_me(d_cache_lock_mutex);
191 BESDEBUG(MODULE, prolog <<
"BEGIN url: " << source_url->str() << endl);
192 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
195 BESDEBUG(MODULE, prolog <<
"CACHE IS DISABLED." << endl);
201 if (source_url->str().find(HTTP_PROTOCOL) != 0 && source_url->str().find(HTTPS_PROTOCOL) != 0) {
202 BESDEBUG(MODULE, prolog <<
"END Not an HTTP request, SKIPPING." << endl);
206 BESRegex *skip_regex = get_skip_regex();
208 size_t match_length = 0;
209 match_length = skip_regex->
match(source_url->str().c_str(), source_url->str().length());
210 if (match_length == source_url->str().length()) {
211 BESDEBUG(MODULE, prolog <<
"END Candidate url matches the "
212 "no_redirects_regex_pattern [" << skip_regex->pattern() <<
213 "][match_length=" << match_length <<
"] SKIPPING." << endl);
216 BESDEBUG(MODULE, prolog <<
"Candidate url: '" << source_url->str() <<
"' does NOT match the "
217 "skip_regex pattern [" << skip_regex->pattern() <<
"]" << endl);
220 BESDEBUG(MODULE, prolog <<
"The cache_effective_urls_skip_regex() was NOT SET "<< endl);
223 shared_ptr<http::EffectiveUrl> effective_url = get_cached_eurl(source_url->str());
227 bool retrieve_and_cache = !effective_url;
233 BESDEBUG(MODULE, prolog <<
"Cache hit for: " << source_url->str() << endl);
234 retrieve_and_cache = effective_url->is_expired();
235 BESDEBUG(MODULE, prolog <<
"Cached target URL is " << (retrieve_and_cache?
"":
"not ") <<
"expired." << endl);
239 if(retrieve_and_cache){
240 BESDEBUG(MODULE, prolog <<
"Acquiring effective URL for " << source_url->str() << endl);
244 sw.
start(prolog +
"Retrieve and cache effective url for source url: " + source_url->str());
245 effective_url = curl::retrieve_effective_url(source_url);
247 BESDEBUG(MODULE, prolog <<
" source_url: " << source_url->str() <<
" (" << (source_url->is_trusted()?
"":
"NOT ") <<
"trusted)" << endl);
248 BESDEBUG(MODULE, prolog <<
"effective_url: " << effective_url->dump() <<
" (" << (source_url->is_trusted()?
"":
"NOT ") <<
"trusted)" << endl);
250 d_effective_urls[source_url->str()] = effective_url;
252 BESDEBUG(MODULE, prolog <<
"Updated record for "<< source_url->str() <<
" cache size: " << d_effective_urls.size() << endl);
260 effective_url = shared_ptr<EffectiveUrl>(
new EffectiveUrl(effective_url));
266 effective_url = shared_ptr<EffectiveUrl>(
new EffectiveUrl(effective_url,source_url->is_trusted()));
269 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
271 BESDEBUG(MODULE, prolog <<
"END" << endl);
273 return effective_url;
281bool EffectiveUrlCache::is_enabled()
289 BESDEBUG(MODULE, prolog << HTTP_CACHE_EFFECTIVE_URLS_KEY <<
": '" << value <<
"'" << endl);
292 BESDEBUG(MODULE, prolog <<
"d_enabled: " << (d_enabled?
"true":
"false") << endl);
300BESRegex *EffectiveUrlCache::get_skip_regex()
306 if(found && value.length()){
307 BESDEBUG(MODULE, prolog << HTTP_CACHE_EFFECTIVE_URLS_SKIP_REGEX_KEY <<
": " << value << endl);
308 d_skip_regex =
new BESRegex(value.c_str());
311 BESDEBUG(MODULE, prolog <<
"d_skip_regex: " << (d_skip_regex?d_skip_regex->pattern():
"Value has not been set.") << endl);
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Regular expression matching.
int match(const char *s, int len, int pos=0) const
Does the pattern match.
virtual bool start(std::string name)
static std::string lowercase(const std::string &s)
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
static EffectiveUrlCache * TheCache()
Get the singleton EffectiveUrlCache instance.
virtual std::string dump() const
dumps information about this object
std::shared_ptr< EffectiveUrl > get_effective_url(std::shared_ptr< url > source_url)
utility class for the HTTP catalog module