bes Updated for version 3.20.13
DirectoryUtil.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29
30#include "config.h"
31#include "DirectoryUtil.h"
32
33#include <cstring>
34#include <cerrno>
35#include <sstream>
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <dirent.h>
39
40#include "BESRegex.h"
41
42// bes
43#include "BESDebug.h"
44#include "BESForbiddenError.h"
45#include "BESInternalError.h"
46#include "TheBESKeys.h"
47#include "BESNotFoundError.h"
48#include "BESUtil.h"
49
50using std::string;
51using std::vector;
52using std::endl;
53
54namespace agg_util {
59struct DirWrapper {
60public:
61
62 DirWrapper(const string& fullDirPath) :
63 _pDir(0), _fullPath(fullDirPath)
64 {
65 // if the user sees null after this, they can check the errno.
66 _pDir = opendir(fullDirPath.c_str());
67 }
68
69 ~DirWrapper()
70 {
71 if (_pDir) {
72 closedir(_pDir);
73 _pDir = 0;
74 }
75 }
76
77 bool fail() const
78 {
79 return !_pDir;
80 }
81
82 DIR*
83 get() const
84 {
85 return _pDir;
86 }
87
88 // automatically closedir() if non-null on dtor.
89 DIR* _pDir;
90 std::string _fullPath;
91};
92
94FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
95 _path(path), _basename(basename), _fullPath("") // start empty, cached later
96 , _isDir(isDir), _modTime(modTime)
97{
100}
101
102FileInfo::~FileInfo()
103{
104}
105
106const std::string&
108{
109 return _path;
110}
111
112const std::string&
113FileInfo::basename() const
114{
115 return _basename;
116}
117
118bool FileInfo::isDir() const
119{
120 return _isDir;
121}
122
123time_t FileInfo::modTime() const
124{
125 return _modTime;
126}
127
129{
130 // we'll just use UTC for the output...
131 struct tm* pTM = gmtime(&_modTime);
132 char buf[128];
133 // this should be "Year-Month-Day Hour:Minute:Second"
134 strftime(buf, 128, "%F %T", pTM);
135 return string(buf);
136}
137
138const std::string&
140{
141 if (_fullPath.empty()) {
142 _fullPath = _path + "/" + _basename;
143 }
144 return _fullPath;
145}
146
147std::string FileInfo::toString() const
148{
149 return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
150 + getModTimeAsString() + "\""
151 " }";
152}
153
155
156const string DirectoryUtil::_sDebugChannel = "agg_util";
157
158DirectoryUtil::DirectoryUtil() :
159 _rootDir("/"), _suffix("") // we start with no filter
160 , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
161{
162 // this can throw, but the class is completely constructed by this point.
163 setRootDir("/");
164}
165
166DirectoryUtil::~DirectoryUtil()
167{
168 clearRegExp();
169}
170
172const std::string&
174{
175 return _rootDir;
176}
177
183void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
184 bool /*allowSymLinks=false*/)
185{
186 if (!allowRelativePaths && hasRelativePath(origRootDir)) {
187 throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
188 __LINE__);
189 }
190
191 // Get the root without trailing slash, we'll add it.
192 _rootDir = origRootDir;
193 removeTrailingSlashes(_rootDir);
194 // If empty here, that means the actual filesystem root.
195
196 // Use the BESUtil to test the path
197 // Since it assumes root is valid and strips preceding "/",
198 // we use "/" as the root path and the root path as the path
199 // to validate the root. This will throw if invalid.
200 BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
201
202 // We should be good if we get here.
203}
204
205void DirectoryUtil::setFilterSuffix(const std::string& suffix)
206{
207 _suffix = suffix;
208}
209
210void DirectoryUtil::setFilterRegExp(const std::string& regexp)
211{
212 clearRegExp(); // avoid leaks
213 if (!regexp.empty()) {
214 _pRegExp = new BESRegex(regexp.c_str());
215 }
216}
217
219{
220 delete _pRegExp;
221 _pRegExp = 0;
222}
223
225{
226 _newestModTime = newestModTime;
227 _filteringModTimes = true;
228}
229
230void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
231 std::vector<FileInfo>* pDirectories)
232{
233 string pathToUse(path);
234 removePrecedingSlashes(pathToUse);
235 pathToUse = getRootDir() + "/" + pathToUse;
236 BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
237
238 // RAII, will closedir no matter how we leave function, including a throw
239 DirWrapper pDir(pathToUse);
240 if (pDir.fail()) {
241 throwErrorForOpendirFail(pathToUse);
242 }
243
244 // Go through each entry and see if it's a directory or regular file and
245 // add it to the list.
246 struct dirent* pDirEnt = 0;
247 while ((pDirEnt = readdir(pDir.get())) != 0) {
248 string entryName = pDirEnt->d_name;
249 // Exclude ".", ".." and any dotfile dirs like ".svn".
250 if (!entryName.empty() && entryName[0] == '.') {
251 continue;
252 }
253
254 // Figure out if it's a regular file or directory
255 string pathToEntry = pathToUse + "/" + entryName;
256 struct stat statBuf;
257 int statResult = stat(pathToEntry.c_str(), &statBuf);
258 if (statResult != 0) {
259 // If we can't stat the file for some reason, then ignore it
260 continue;
261 }
262
263 // Use the passed in path for the entry since we
264 // want to make the locations be relative to the root
265 // for loading later.
266 if (pDirectories && S_ISDIR(statBuf.st_mode)) {
267 pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
268 }
269 else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
270 FileInfo theFile(path, entryName, false, statBuf.st_mtime);
271 // match against the relative passed in path, not root full path
272 if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
273 pRegularFiles->push_back(theFile);
274 }
275 }
276 }
277}
278
279void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
280 std::vector<FileInfo>* pDirectories)
281{
282 // Remove trailing slash to make it canonical
283 string canonicalPath = path;
284 removeTrailingSlashes(canonicalPath);
285
286 // We use our own local vector of directories in order to recurse,
287 // then add them to the end of pDirectories if it exists.
288
289 // First, get the current path's listing
290 vector<FileInfo> dirs;
291 dirs.reserve(16); // might as well start with a "few" to avoid grows.
292
293 // Keep adding them to the user specified regular file list if desired,
294 // but keep track of dirs ourself.
295 getListingForPath(canonicalPath, pRegularFiles, &dirs);
296
297 // If the caller wanted directories, append them all to the return
298 if (pDirectories) {
299 pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
300 }
301
302 // Finally, recurse on each directory in dirs
303 for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
304 string subPath = canonicalPath + "/" + it->basename();
305 BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
306 // Pass down the caller's accumulated vector's to be filled in.
307 getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
308 }
309
310}
311
312void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
313{
314 // call the other one, not accumulated the directories, only recursing into them.
315 getListingForPathRecursive(path, &rRegularFiles, 0);
316}
317
318void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
319{
320 switch (errno) {
321 case EACCES: {
322 string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
323 throw BESForbiddenError(msg, __FILE__, __LINE__);
324 }
325 break;
326
327 case ELOOP: {
328 string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
329 throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
330 }
331 break;
332
333 case ENAMETOOLONG: {
334 string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
335 throw BESNotFoundError(msg, __FILE__, __LINE__);
336 }
337 break;
338
339 case ENOENT: {
340 string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
341 throw BESNotFoundError(msg, __FILE__, __LINE__);
342 }
343 break;
344
345 case ENOTDIR: {
346 string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
347 throw BESNotFoundError(msg, __FILE__, __LINE__);
348 }
349 break;
350
351 case ENFILE: {
352 string msg = "Internal Error: Too many files are currently open!";
353 throw BESInternalError(msg, __FILE__, __LINE__);
354 }
355 break;
356
357 default: {
358 string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
359 throw BESInternalError(msg, __FILE__, __LINE__);
360 }
361 }
362}
363
364bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
365{
366 bool matches = true;
367 // Do the suffix first since it's fast
368 if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
369 matches = false;
370 }
371
372 // Suffix matches and we have a regexp, check that
373 if (matches && _pRegExp) {
374 // match the full string, -1 on fail, num chars matching otherwise
375 int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
376 matches = (numCharsMatching > 0); // TODO do we want to match the size()?
377 }
378
379 if (matches && _filteringModTimes) {
380 matches = (modTime < _newestModTime);
381 }
382
383 return matches;
384}
385
386bool DirectoryUtil::hasRelativePath(const std::string& path)
387{
388 return (path.find("..") != string::npos);
389}
390
392{
393 if (!path.empty()) {
394 string::size_type pos = path.find_last_not_of("/");
395 if (pos != string::npos) {
396 path = path.substr(0, pos + 1);
397 }
398 }
399}
400
402{
403 if (!path.empty()) {
404 string::size_type pos = path.find_first_not_of("/");
405 path = path.substr(pos, string::npos);
406 }
407}
408
409void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
410{
411 std::ostringstream oss;
412 printFileInfoList(oss, listing);
413 BESDEBUG(_sDebugChannel, oss.str() << endl);
414}
415
416void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
417{
418 for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
419 os << it->toString() << endl;
420 }
421}
422
424{
425 bool found;
426 string rootDir;
427 TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
428 if (!found) {
429 TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
430 }
431 if (!found) {
432 rootDir = "/";
433 }
434 return rootDir;
435}
436
437bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
438{
439 // see if the last suffix.size() characters match.
440 bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
441 return matches;
442}
443}
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition: BESUtil.cc:382
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static std::string getBESRootDir()
void setFilterRegExp(const std::string &regexp)
static void removePrecedingSlashes(std::string &path)
static bool hasRelativePath(const std::string &path)
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
void setFilterSuffix(const std::string &suffix)
static void removeTrailingSlashes(std::string &path)
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void setFilterModTimeOlderThan(time_t newestModTime)
const std::string & getRootDir() const
std::string getModTimeAsString() const
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
const std::string & path() const
const std::string & getFullPath() const
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...