bes Updated for version 3.20.13
AllowedHosts.cc
1// RemoteAccess.cc
2
3// -*- mode: c++; c-basic-offset:4 -*-
4
5// This file is part of the OPeNDAP Back-End Server (BES)
6// and creates an allowed hosts list of which systems that may be
7// accessed by the server as part of it's routine operation.
8
9// Copyright (c) 2018 OPeNDAP, Inc.
10// Author: Nathan D. Potter <ndp@opendap.org>
11//
12// This library is free software; you can redistribute it and/or
13// modify it under the terms of the GNU Lesser General Public
14// License as published by the Free Software Foundation; either
15// version 2.1 of the License, or (at your option) any later version.
16//
17// This library is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20// Lesser General Public License for more details.
21//
22// You should have received a copy of the GNU Lesser General Public
23// License along with this library; if not, write to the Free Software
24// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25//
26// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
27
28#include "config.h"
29
30#include <sstream>
31
32#include "BESUtil.h"
33#include "BESCatalog.h"
34#include "BESCatalogList.h"
35#include "BESCatalogUtils.h"
36#include "BESRegex.h"
37#include "TheBESKeys.h"
38#include "BESInternalError.h"
39#include "BESDebug.h"
40#include "BESNotFoundError.h"
41#include "BESForbiddenError.h"
42#include "BESLog.h"
43
44#include "HttpNames.h"
45#include "url_impl.h"
46
47#include "AllowedHosts.h"
48
49
50using namespace std;
51
52#define MODULE "ah"
53#define prolog string("AllowedHosts::").append(__func__).append("() - ")
54
55namespace http {
56
57AllowedHosts *AllowedHosts::d_instance = nullptr;
61static std::once_flag d_ah_init_once;
62
70 std::call_once(d_ah_init_once, AllowedHosts::initialize_instance);
71 return d_instance;
72}
73
74AllowedHosts::AllowedHosts() {
75 bool found = false;
76 string key = ALLOWED_HOSTS_BES_KEY;
77 TheBESKeys::TheKeys()->get_values(ALLOWED_HOSTS_BES_KEY, d_allowed_hosts, found);
78 if (!found) {
79 throw BESInternalError(string("The allowed hosts key, '") + ALLOWED_HOSTS_BES_KEY
80 + "' has not been configured.", __FILE__, __LINE__);
81 }
82}
83
87void AllowedHosts::initialize_instance() {
88 d_instance = new AllowedHosts();
89#ifdef HAVE_ATEXIT
90 atexit(delete_instance);
91#endif
92}
93
97void AllowedHosts::delete_instance() {
98 delete d_instance;
99 d_instance = 0;
100}
101
113bool AllowedHosts::is_allowed(shared_ptr<http::url> candidate_url) {
114 string error_msg;
115 return is_allowed(candidate_url, error_msg);
116}
117
118bool AllowedHosts::is_allowed(shared_ptr<http::url> candidate_url, std::string &why_not) {
119 BESDEBUG(MODULE, prolog << "BEGIN candidate_url: " << candidate_url->str() << endl);
120 bool isAllowed = false;
121
122 // Special case: This allows any file: URL to pass if the URL starts with the default
123 // catalog's path.
124 if (candidate_url->protocol() == FILE_PROTOCOL) {
125
126 // Ensure that the file path starts with the catalog root dir.
127 // We know that when a file URL is parsed by http::url it stores everything in after the "file://" mark in
128 // the path, as there is no hostname.
129 string file_path = candidate_url->path();
130 BESDEBUG(MODULE, prolog << " file_path: '" << file_path <<
131 "' (length: " << file_path.length() << " size: " << file_path.size() << ")" <<endl);
132 // Get the BES Catalog
134 string default_catalog_name = bcl->default_catalog_name();
135 BESDEBUG(MODULE, prolog << "Searching for catalog named: '" << default_catalog_name << "'" << endl);
136 BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
137 if (bcat) {
138 BESDEBUG(MODULE, prolog << "Found catalog named: '" << bcat->get_catalog_name() << "'" << endl);
139 } else {
140 string error_msg = "INTERNAL_ERROR: Unable to locate default catalog. Check BES configuration.";
141 BESDEBUG(MODULE, prolog << error_msg << endl);
142 throw BESInternalError(error_msg, __FILE__, __LINE__);
143 }
144
145 string catalog_root = bcat->get_root();
146 BESDEBUG(MODULE, prolog << "catalog_root: '" << catalog_root <<
147 "' (length: " << catalog_root.length() << " size: " << catalog_root.size() << ")" << endl);
148
149 string relative_path;
150 if (file_path[0] == '/') {
151 if (file_path.length() < catalog_root.length()) {
152 // Upward traversal is not allowed (specified resource path is shorter than data root path)
153 why_not = "Path is out of scope from configuration.";
154 isAllowed = false;
155 } else {
156 BESDEBUG(MODULE, prolog << "file_path: " << file_path << endl);
157 BESDEBUG(MODULE, prolog << "catalog_root: " << catalog_root << endl);
158 size_t ret = file_path.find(catalog_root);
159 BESDEBUG(MODULE, prolog << "file_path.find(catalog_root): " << ret << endl);
160 isAllowed = (ret == 0);
161 relative_path = file_path.substr(catalog_root.length());
162 BESDEBUG(MODULE, prolog << "relative_path: " << relative_path << endl);
163 BESDEBUG(MODULE, prolog << "isAllowed: " << (isAllowed?"true":"false") << endl);
164
165 }
166 } else {
167 BESDEBUG(MODULE, prolog << "Relative path detected");
168 relative_path = file_path;
169 isAllowed = true;
170 }
171
172 // string::find() returns 0 if the submitted path begins with the catalog root.
173 // And since we are just looking at the catalog.root as a prefix of the resource
174 // name we only allow access to the resource for an exact match.
175 if (isAllowed) {
176 // If we stop adding a '/' to file_path values that don't begin with one
177 // then we need to detect the use of the relative path here
178 bool follow_sym_links = bcat->get_catalog_utils()->follow_sym_links();
179 try {
180 BESUtil::check_path(relative_path, catalog_root, follow_sym_links);
181 }
182 catch (BESNotFoundError &e) {
183 why_not = e.get_message();
184 isAllowed = false;
185 }
186 catch (BESForbiddenError &e) {
187 why_not = e.get_message();
188 isAllowed = false;
189 }
190 }
191 BESDEBUG(MODULE, prolog << "File Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
192 } else if(candidate_url->protocol() == HTTPS_PROTOCOL || candidate_url->protocol() == HTTP_PROTOCOL ){
193
194 isAllowed = candidate_url->is_trusted() || check(candidate_url->str());
195
196 if (candidate_url->is_trusted()) {
197 INFO_LOG(prolog << "Candidate URL is marked trusted, allowing. url: " << candidate_url->str() << endl);
198 }
199 BESDEBUG(MODULE, prolog << "HTTP Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
200 }
201 else {
202 stringstream ss;
203 ss << "The candidate_url utilizes an unsupported protocol '" << candidate_url->protocol() << "'" ;
204 BESDEBUG(MODULE, prolog << ss.str() << endl);
205 throw BESInternalError(ss.str(),__FILE__,__LINE__);
206 }
207 BESDEBUG(MODULE, prolog << "END Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
208 return isAllowed;
209}
210
211
212
213bool AllowedHosts::check(const std::string &url){
214 bool isAllowed=false;
215 auto it = d_allowed_hosts.begin();
216 auto end_it = d_allowed_hosts.end();
217 for (; it != end_it && !isAllowed; it++) {
218 string a_regex_pattern = *it;
219 BESRegex reg_expr(a_regex_pattern.c_str());
220 int match_result = reg_expr.match(url.c_str(), url.length());
221 if (match_result >= 0) {
222 auto match_length = (unsigned int) match_result;
223 if (match_length == url.length()) {
224 BESDEBUG(MODULE,
225 prolog << "FULL MATCH. pattern: " << a_regex_pattern << " url: " << url << endl);
226 isAllowed = true;;
227 } else {
228 BESDEBUG(MODULE,
229 prolog << "No Match. pattern: " << a_regex_pattern << " url: " << url << endl);
230 }
231 }
232 }
233 return isAllowed;
234}
235
236} // namespace http
List of all registered catalogs.
virtual std::string default_catalog_name() const
The name of the default catalog.
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual std::string get_root() const =0
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:112
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:102
std::string get_message() const
get the error message for this exception
Definition: BESError.h:111
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition: BESUtil.cc:382
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
Can a given URL be dereferenced given the BES's configuration?
Definition: AllowedHosts.h:54
bool is_allowed(std::shared_ptr< http::url > candidate_url)
static AllowedHosts * theHosts()
Static accessor for the singleton.
Definition: AllowedHosts.cc:69
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55