bes  Updated for version 3.20.6
DirectoryUtil.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 #include "DirectoryUtil.h"
32 
33 #include <cstring>
34 #include <cerrno>
35 #include <sstream>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 
40 // libdap
41 #include "GNURegex.h"
42 
43 // bes
44 #include "BESDebug.h"
45 #include "BESForbiddenError.h"
46 #include "BESInternalError.h"
47 #include "TheBESKeys.h"
48 #include "BESNotFoundError.h"
49 #include "BESUtil.h"
50 
51 using std::string;
52 using std::vector;
53 using std::endl;
54 
55 namespace agg_util {
60 struct DirWrapper {
61 public:
62 
63  DirWrapper(const string& fullDirPath) :
64  _pDir(0), _fullPath(fullDirPath)
65  {
66  // if the user sees null after this, they can check the errno.
67  _pDir = opendir(fullDirPath.c_str());
68  }
69 
70  ~DirWrapper()
71  {
72  if (_pDir) {
73  closedir(_pDir);
74  _pDir = 0;
75  }
76  }
77 
78  bool fail() const
79  {
80  return !_pDir;
81  }
82 
83  DIR*
84  get() const
85  {
86  return _pDir;
87  }
88 
89  // automatically closedir() if non-null on dtor.
90  DIR* _pDir;
91  std::string _fullPath;
92 };
93 
95 FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
96  _path(path), _basename(basename), _fullPath("") // start empty, cached later
97  , _isDir(isDir), _modTime(modTime)
98 {
101 }
102 
103 FileInfo::~FileInfo()
104 {
105 }
106 
107 const std::string&
109 {
110  return _path;
111 }
112 
113 const std::string&
114 FileInfo::basename() const
115 {
116  return _basename;
117 }
118 
119 bool FileInfo::isDir() const
120 {
121  return _isDir;
122 }
123 
124 time_t FileInfo::modTime() const
125 {
126  return _modTime;
127 }
128 
129 std::string FileInfo::getModTimeAsString() const
130 {
131  // we'll just use UTC for the output...
132  struct tm* pTM = gmtime(&_modTime);
133  char buf[128];
134  // this should be "Year-Month-Day Hour:Minute:Second"
135  strftime(buf, 128, "%F %T", pTM);
136  return string(buf);
137 }
138 
139 const std::string&
141 {
142  if (_fullPath.empty()) {
143  _fullPath = _path + "/" + _basename;
144  }
145  return _fullPath;
146 }
147 
148 std::string FileInfo::toString() const
149 {
150  return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
151  + getModTimeAsString() + "\""
152  " }";
153 }
154 
156 
157 const string DirectoryUtil::_sDebugChannel = "agg_util";
158 
159 DirectoryUtil::DirectoryUtil() :
160  _rootDir("/"), _suffix("") // we start with no filter
161  , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
162 {
163  // this can throw, but the class is completely constructed by this point.
164  setRootDir("/");
165 }
166 
167 DirectoryUtil::~DirectoryUtil()
168 {
169  clearRegExp();
170 }
171 
173 const std::string&
175 {
176  return _rootDir;
177 }
178 
184 void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
185  bool /*allowSymLinks=false*/)
186 {
187  if (!allowRelativePaths && hasRelativePath(origRootDir)) {
188  throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
189  __LINE__);
190  }
191 
192  // Get the root without trailing slash, we'll add it.
193  _rootDir = origRootDir;
194  removeTrailingSlashes(_rootDir);
195  // If empty here, that means the actual filesystem root.
196 
197  // Use the BESUtil to test the path
198  // Since it assumes root is valid and strips preceding "/",
199  // we use "/" as the root path and the root path as the path
200  // to validate the root. This will throw if invalid.
201  BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
202 
203  // We should be good if we get here.
204 }
205 
206 void DirectoryUtil::setFilterSuffix(const std::string& suffix)
207 {
208  _suffix = suffix;
209 }
210 
211 void DirectoryUtil::setFilterRegExp(const std::string& regexp)
212 {
213  clearRegExp(); // avoid leaks
214  if (!regexp.empty()) {
215  _pRegExp = new libdap::Regex(regexp.c_str());
216  }
217 }
218 
220 {
221  delete _pRegExp;
222  _pRegExp = 0;
223 }
224 
226 {
227  _newestModTime = newestModTime;
228  _filteringModTimes = true;
229 }
230 
231 void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
232  std::vector<FileInfo>* pDirectories)
233 {
234  string pathToUse(path);
235  removePrecedingSlashes(pathToUse);
236  pathToUse = getRootDir() + "/" + pathToUse;
237  BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
238 
239  // RAII, will closedir no matter how we leave function, including a throw
240  DirWrapper pDir(pathToUse);
241  if (pDir.fail()) {
242  throwErrorForOpendirFail(pathToUse);
243  }
244 
245  // Go through each entry and see if it's a directory or regular file and
246  // add it to the list.
247  struct dirent* pDirEnt = 0;
248  while ((pDirEnt = readdir(pDir.get())) != 0) {
249  string entryName = pDirEnt->d_name;
250  // Exclude ".", ".." and any dotfile dirs like ".svn".
251  if (!entryName.empty() && entryName[0] == '.') {
252  continue;
253  }
254 
255  // Figure out if it's a regular file or directory
256  string pathToEntry = pathToUse + "/" + entryName;
257  struct stat statBuf;
258  int statResult = stat(pathToEntry.c_str(), &statBuf);
259  if (statResult != 0) {
260  // If we can't stat the file for some reason, then ignore it
261  continue;
262  }
263 
264  // Use the passed in path for the entry since we
265  // want to make the locations be relative to the root
266  // for loading later.
267  if (pDirectories && S_ISDIR(statBuf.st_mode)) {
268  pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
269  }
270  else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
271  FileInfo theFile(path, entryName, false, statBuf.st_mtime);
272  // match against the relative passed in path, not root full path
273  if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
274  pRegularFiles->push_back(theFile);
275  }
276  }
277  }
278 }
279 
280 void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
281  std::vector<FileInfo>* pDirectories)
282 {
283  // Remove trailing slash to make it canonical
284  string canonicalPath = path;
285  removeTrailingSlashes(canonicalPath);
286 
287  // We use our own local vector of directories in order to recurse,
288  // then add them to the end of pDirectories if it exists.
289 
290  // First, get the current path's listing
291  vector<FileInfo> dirs;
292  dirs.reserve(16); // might as well start with a "few" to avoid grows.
293 
294  // Keep adding them to the user specified regular file list if desired,
295  // but keep track of dirs ourself.
296  getListingForPath(canonicalPath, pRegularFiles, &dirs);
297 
298  // If the caller wanted directories, append them all to the return
299  if (pDirectories) {
300  pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
301  }
302 
303  // Finally, recurse on each directory in dirs
304  for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
305  string subPath = canonicalPath + "/" + it->basename();
306  BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
307  // Pass down the caller's accumulated vector's to be filled in.
308  getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
309  }
310 
311 }
312 
313 void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
314 {
315  // call the other one, not accumulated the directories, only recursing into them.
316  getListingForPathRecursive(path, &rRegularFiles, 0);
317 }
318 
319 void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
320 {
321  switch (errno) {
322  case EACCES: {
323  string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
324  throw BESForbiddenError(msg, __FILE__, __LINE__);
325  }
326  break;
327 
328  case ELOOP: {
329  string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
330  throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
331  }
332  break;
333 
334  case ENAMETOOLONG: {
335  string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
336  throw BESNotFoundError(msg, __FILE__, __LINE__);
337  }
338  break;
339 
340  case ENOENT: {
341  string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
342  throw BESNotFoundError(msg, __FILE__, __LINE__);
343  }
344  break;
345 
346  case ENOTDIR: {
347  string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
348  throw BESNotFoundError(msg, __FILE__, __LINE__);
349  }
350  break;
351 
352  case ENFILE: {
353  string msg = "Internal Error: Too many files are currently open!";
354  throw BESInternalError(msg, __FILE__, __LINE__);
355  }
356  break;
357 
358  default: {
359  string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
360  throw BESInternalError(msg, __FILE__, __LINE__);
361  }
362  }
363 }
364 
365 bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
366 {
367  bool matches = true;
368  // Do the suffix first since it's fast
369  if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
370  matches = false;
371  }
372 
373  // Suffix matches and we have a regexp, check that
374  if (matches && _pRegExp) {
375  // match the full string, -1 on fail, num chars matching otherwise
376  int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
377  matches = (numCharsMatching > 0); // TODO do we want to match the size()?
378  }
379 
380  if (matches && _filteringModTimes) {
381  matches = (modTime < _newestModTime);
382  }
383 
384  return matches;
385 }
386 
387 bool DirectoryUtil::hasRelativePath(const std::string& path)
388 {
389  return (path.find("..") != string::npos);
390 }
391 
393 {
394  if (!path.empty()) {
395  string::size_type pos = path.find_last_not_of("/");
396  if (pos != string::npos) {
397  path = path.substr(0, pos + 1);
398  }
399  }
400 }
401 
403 {
404  if (!path.empty()) {
405  string::size_type pos = path.find_first_not_of("/");
406  path = path.substr(pos, string::npos);
407  }
408 }
409 
410 void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
411 {
412  std::ostringstream oss;
413  printFileInfoList(oss, listing);
414  BESDEBUG(_sDebugChannel, oss.str() << endl);
415 }
416 
417 void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
418 {
419  for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
420  os << it->toString() << endl;
421  }
422 }
423 
425 {
426  bool found;
427  string rootDir;
428  TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
429  if (!found) {
430  TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
431  }
432  if (!found) {
433  rootDir = "/";
434  }
435  return rootDir;
436 }
437 
438 bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
439 {
440  // see if the last suffix.size() characters match.
441  bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
442  return matches;
443 }
444 }
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:254
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
static std::string getBESRootDir()
void setFilterRegExp(const std::string &regexp)
static void removePrecedingSlashes(std::string &path)
static bool hasRelativePath(const std::string &path)
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
void setFilterSuffix(const std::string &suffix)
static void removeTrailingSlashes(std::string &path)
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void setFilterModTimeOlderThan(time_t newestModTime)
const std::string & getRootDir() const
std::string getModTimeAsString() const
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
const std::string & path() const
const std::string & getFullPath() const
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...