From cd45ed34a236aecd92b1aa05950057f2ec27d33b Mon Sep 17 00:00:00 2001 From: Teemu Murtola Date: Sun, 11 Jan 2015 13:34:03 +0200 Subject: [PATCH] Convert forcefield search to C++ - Convert gmx_directory_*() to a C++ class in directoryenumerator.* (code moved from futil.* and converted to C++). - Add some errno checks to detect error conditions better. - Add a function to enumerate files/directories in all GMXLIB directories to DataFileFinder, and use this to replace functionality in fflibutil.cpp. - Convert the code to enumerate forcefields into a C++ function with a more descriptive name, and convert the choose_ff() function to C++ to avoid complicated logic where the same directory is searched multiple times with different functions. This is mainly a rewrite of the code in C++, with only minor functional changes: - Unit tests of this code no longer get confused by GMXLIB being set (currently, there are no such tests, though). - Force fields are searched using the same logic as other data files (the default directory is always searched). - Some I/O errors are now handled with more descriptive errors instead of silently ignoring directories that cannot be listed. Change-Id: Ib3d070af5907ea00dfa9409e753868251eb8c7da --- src/gromacs/gmxpreprocess/fflibutil.cpp | 250 ++++------------ src/gromacs/gmxpreprocess/fflibutil.h | 19 +- src/gromacs/gmxpreprocess/pdb2top.cpp | 222 +++++++------- src/gromacs/utility/datafilefinder.cpp | 58 +++- src/gromacs/utility/datafilefinder.h | 62 ++++ src/gromacs/utility/directoryenumerator.cpp | 305 ++++++++++++++++++++ src/gromacs/utility/directoryenumerator.h | 122 ++++++++ src/gromacs/utility/futil.cpp | 194 +------------ src/gromacs/utility/futil.h | 29 -- 9 files changed, 726 insertions(+), 535 deletions(-) create mode 100644 src/gromacs/utility/directoryenumerator.cpp create mode 100644 src/gromacs/utility/directoryenumerator.h diff --git a/src/gromacs/gmxpreprocess/fflibutil.cpp b/src/gromacs/gmxpreprocess/fflibutil.cpp index 962ed08e95..dd210b64a1 100644 --- a/src/gromacs/gmxpreprocess/fflibutil.cpp +++ b/src/gromacs/gmxpreprocess/fflibutil.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2010,2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2010,2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -36,18 +36,21 @@ #include "fflibutil.h" -#include -#include #include -#include "gromacs/legacyheaders/network.h" +#include +#include + #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/datafilefinder.h" +#include "gromacs/utility/directoryenumerator.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/file.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/path.h" -#include "gromacs/utility/programcontext.h" #include "gromacs/utility/smalloc.h" +#include "gromacs/utility/stringutil.h" const char *fflib_forcefield_dir_ext() { @@ -93,214 +96,75 @@ void fflib_filename_base(const char *filename, char *filebase, int maxlen) } } -static void sort_filenames(int n, char **name, char **name2) -{ - /* Slow sort, but we usually have tens of names */ - int i, j, f; - char *tmp; - - for (i = 0; i < n-1; i++) - { - f = i; - for (j = i+1; j < n; j++) - { - if (strcmp(name[j], name[f]) < 0) - { - f = j; - } - } - if (f > i) - { - tmp = name[i]; - name[i] = name[f]; - name[f] = tmp; - if (name2 != NULL) - { - tmp = name2[i]; - name2[i] = name2[f]; - name2[f] = tmp; - } - } - } -} - -static int low_fflib_search_file_end(const char *ffdir, - gmx_bool bAddCWD, - const char *file_end, - gmx_bool bFatalError, - char ***filenames, - char ***filenames_short) +int fflib_search_file_end(const char *ffdir, + const char *file_end, + gmx_bool bFatalError, + char ***filenames) { - char **fns = NULL, **fns_short = NULL; - int n = 0; try { - std::vector libPaths; - bool bEnvIsSet = false; - - if (ffdir != NULL) + std::string ffdirFull(gmx::getLibraryFileFinder().findFile(ffdir)); + std::vector result + = gmx::DirectoryEnumerator::enumerateFilesWithExtension( + ffdirFull.c_str(), file_end, true); + if (result.empty() && bFatalError) { - /* Search ffdir in current dir and library dirs */ - libPaths.push_back(gmxlibfn(ffdir)); + std::string message + = gmx::formatString("Could not find any files ending on '%s' " + "in the force field directory '%s'", + file_end, ffdir); + GMX_THROW(gmx::InvalidInputError(message)); } - else + const int count = static_cast(result.size()); + for (int i = 0; i < count; ++i) { - /* GMXLIB can be a path now */ - if (bAddCWD) - { - libPaths.push_back("."); - } - const char *lib = getenv("GMXLIB"); - if (lib != NULL) - { - bEnvIsSet = true; - gmx::Path::splitPathEnvironment(lib, &libPaths); - } - else - { - libPaths.push_back(gmx::getProgramContext().defaultLibraryDataPath()); - } + result[i] = gmx::Path::join(ffdir, result[i]); } - - const int len_fe = strlen(file_end); - - std::vector::const_iterator i; - for (i = libPaths.begin(); i != libPaths.end(); ++i) + char **fns; + snew(fns, count); + for (int i = 0; i < count; ++i) { - const char *dir = i->c_str(); - gmx_directory_t dirhandle; - const int rc = gmx_directory_open(&dirhandle, dir); - if (rc == 0) - { - char nextname[STRLEN]; - int n_thisdir = 0; - while (gmx_directory_nextfile(dirhandle, nextname, STRLEN-1) == 0) - { - nextname[STRLEN-1] = 0; - if (debug) - { - fprintf(debug, "dir '%s' %d file '%s'\n", - dir, n_thisdir, nextname); - } - const int len_name = strlen(nextname); - /* What about case sensitivity? */ - if (len_name >= len_fe && - strcmp(nextname+len_name-len_fe, file_end) == 0) - { - char fn_dir[GMX_PATH_MAX]; - /* We have a match */ - srenew(fns, n+1); - sprintf(fn_dir, "%s%c%s", dir, DIR_SEPARATOR, nextname); - - /* Copy the file name, possibly including the path. */ - fns[n] = gmx_strdup(fn_dir); - - if (ffdir == NULL) - { - /* We are searching in a path. - * Use the relative path when we use share/top - * from the installation. - * Add the full path when we use the current - * working directory of GMXLIB. - */ - srenew(fns_short, n+1); - if (strcmp(dir, ".") == 0 || bEnvIsSet) - { - fns_short[n] = gmx_strdup(fn_dir); - } - else - { - fns_short[n] = gmx_strdup(nextname); - } - } - n++; - n_thisdir++; - } - } - gmx_directory_close(dirhandle); - - sort_filenames(n_thisdir, - fns+n-n_thisdir, - fns_short == NULL ? NULL : fns_short+n-n_thisdir); - } + fns[i] = gmx_strdup(result[i].c_str()); } + *filenames = fns; + return count; } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; - - if (n == 0 && bFatalError) - { - if (ffdir != NULL) - { - gmx_fatal(FARGS, "Could not find any files ending on '%s' in the force field directory '%s'", file_end, ffdir); - } - else - { - gmx_fatal(FARGS, "Could not find any files ending on '%s' in the current directory or the GROMACS library search path", file_end); - } - } - - *filenames = fns; - if (ffdir == NULL) - { - *filenames_short = fns_short; - } - - return n; -} - -int fflib_search_file_end(const char *ffdir, - const char *file_end, - gmx_bool bFatalError, - char ***filenames) -{ - return low_fflib_search_file_end(ffdir, FALSE, file_end, bFatalError, - filenames, NULL); } -int fflib_search_file_in_dirend(const char *filename, const char *dirend, - char ***dirnames) +std::vector fflib_enumerate_forcefields() { - int nf, i; - char **f, **f_short; - int n; - char **dns; - gmx_directory_t dirhandle; - char nextname[STRLEN]; - int rc; - - /* Find all files (not only dir's) ending on dirend */ - nf = low_fflib_search_file_end(NULL, TRUE, dirend, FALSE, &f, &f_short); - - n = 0; - dns = NULL; - for (i = 0; i < nf; i++) + const char *const dirend = fflib_forcefield_dir_ext(); + const char *const filename = fflib_forcefield_itp(); + std::vector candidates + = gmx::getLibraryFileFinder().enumerateFiles( + gmx::DataFileOptions(dirend) + .throwIfNotFound(false)); + + std::vector result; + for (size_t i = 0; i < candidates.size(); ++i) { - rc = gmx_directory_open(&dirhandle, f[i]); - - if (rc == 0) + std::string testPath(gmx::Path::join( + candidates[i].dir, candidates[i].name, filename)); + // TODO: Consider also checking that the directory can be listed. + if (gmx::File::exists(testPath)) { - while (gmx_directory_nextfile(dirhandle, nextname, STRLEN-1) == 0) - { - nextname[STRLEN-1] = 0; - if (strcmp(nextname, filename) == 0) - { - /* We have a match */ - srenew(dns, n+1); - dns[n] = gmx_strdup(f_short[i]); - n++; - } - } - gmx_directory_close(dirhandle); + result.push_back(candidates[i]); } - sfree(f[i]); - sfree(f_short[i]); } - sfree(f); - sfree(f_short); - *dirnames = dns; + // TODO: Consider merging this into enumerateFiles(), such that the error + // could also list the directories searched. + if (result.empty()) + { + std::string message + = gmx::formatString("No force fields found (files with name '%s' " + "in subdirectories ending on '%s')", + filename, dirend); + GMX_THROW(gmx::InvalidInputError(message)); + } - return n; + return result; } gmx_bool fflib_fexist(const char *file) diff --git a/src/gromacs/gmxpreprocess/fflibutil.h b/src/gromacs/gmxpreprocess/fflibutil.h index 6703c2b7b0..572235eee9 100644 --- a/src/gromacs/gmxpreprocess/fflibutil.h +++ b/src/gromacs/gmxpreprocess/fflibutil.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2010,2014, by the GROMACS development team, led by + * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -38,9 +38,18 @@ #include -#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/utility/basedefinitions.h" #ifdef __cplusplus +#include + +#include "gromacs/utility/datafilefinder.h" + +/*! \brief + * Enumerates forcefields in the data directories. + */ +std::vector fflib_enumerate_forcefields(); + extern "C" { #endif @@ -68,12 +77,6 @@ int fflib_search_file_end(const char *ffdir, * Return the number of files and the file names in filenames. */ -int fflib_search_file_in_dirend(const char *filename, const char *dirend, - char ***dirnames); -/* Search for files with name filename in subdirectories with names - * ending on dirend. - * Return the number of files and the directory names in dirnames. - */ gmx_bool fflib_fexist(const char *file); /* Check if a file exists in the force field library */ diff --git a/src/gromacs/gmxpreprocess/pdb2top.cpp b/src/gromacs/gmxpreprocess/pdb2top.cpp index 8d9de45d32..50676f4e17 100644 --- a/src/gromacs/gmxpreprocess/pdb2top.cpp +++ b/src/gromacs/gmxpreprocess/pdb2top.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -41,6 +41,11 @@ #include #include #include +#include + +#include +#include +#include #include "gromacs/fileio/filenm.h" #include "gromacs/fileio/pdbio.h" @@ -64,9 +69,12 @@ #include "gromacs/utility/cstringutil.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/file.h" #include "gromacs/utility/futil.h" +#include "gromacs/utility/path.h" #include "gromacs/utility/programcontext.h" #include "gromacs/utility/smalloc.h" +#include "gromacs/utility/stringutil.h" /* this must correspond to enum in pdb2top.h */ const char *hh[ehisNR] = { "HISD", "HISE", "HISH", "HIS1" }; @@ -119,89 +127,47 @@ gmx_bool is_int(double x) return (fabs(x-ix) < tol); } -static void swap_strings(char **s, int i, int j) -{ - char *tmp; - - tmp = s[i]; - s[i] = s[j]; - s[j] = tmp; -} - -void -choose_ff(const char *ffsel, - char *forcefield, int ff_maxlen, - char *ffdir, int ffdir_maxlen) +static void +choose_ff_impl(const char *ffsel, + char *forcefield, int ff_maxlen, + char *ffdir, int ffdir_maxlen) { - int nff; - char **ffdirs, **ffs, **ffs_dir, *ptr; - int i, j, sel, cwdsel, nfound; - char buf[STRLEN], **desc; - FILE *fp; - char *pret; - - nff = fflib_search_file_in_dirend(fflib_forcefield_itp(), - fflib_forcefield_dir_ext(), - &ffdirs); - - if (nff == 0) - { - gmx_fatal(FARGS, "No force fields found (files with name '%s' in subdirectories ending on '%s')", - fflib_forcefield_itp(), fflib_forcefield_dir_ext()); - } + std::vector ffdirs = fflib_enumerate_forcefields(); + const int nff = static_cast(ffdirs.size()); /* Replace with unix path separators */ if (DIR_SEPARATOR != '/') { - for (i = 0; i < nff; i++) + for (int i = 0; i < nff; ++i) { - while ( (ptr = strchr(ffdirs[i], DIR_SEPARATOR)) != NULL) - { - *ptr = '/'; - } + std::replace(ffdirs[i].dir.begin(), ffdirs[i].dir.end(), DIR_SEPARATOR, '/'); } } /* Store the force field names in ffs */ - snew(ffs, nff); - snew(ffs_dir, nff); - for (i = 0; i < nff; i++) + std::vector ffs; + ffs.reserve(ffdirs.size()); + for (int i = 0; i < nff; ++i) { - /* Remove the path from the ffdir name - use our unix standard here! */ - ptr = strrchr(ffdirs[i], '/'); - if (ptr == NULL) - { - ffs[i] = gmx_strdup(ffdirs[i]); - ffs_dir[i] = low_gmxlibfn(ffdirs[i], FALSE, FALSE); - if (ffs_dir[i] == NULL) - { - gmx_fatal(FARGS, "Can no longer find file '%s'", ffdirs[i]); - } - } - else - { - ffs[i] = gmx_strdup(ptr+1); - ffs_dir[i] = gmx_strdup(ffdirs[i]); - } - ffs_dir[i][strlen(ffs_dir[i])-strlen(ffs[i])-1] = '\0'; - /* Remove the extension from the ffdir name */ - ffs[i][strlen(ffs[i])-strlen(fflib_forcefield_dir_ext())] = '\0'; + ffs.push_back(gmx::stripSuffixIfPresent(ffdirs[i].name, + fflib_forcefield_dir_ext())); } + int sel; if (ffsel != NULL) { - sel = -1; - cwdsel = -1; - nfound = 0; - for (i = 0; i < nff; i++) + sel = -1; + int cwdsel = -1; + int nfound = 0; + for (int i = 0; i < nff; ++i) { - if (strcmp(ffs[i], ffsel) == 0) + if (ffs[i] == ffsel) { /* Matching ff name */ sel = i; nfound++; - if (strncmp(ffs_dir[i], ".", 1) == 0) + if (ffdirs[i].dir == ".") { cwdsel = i; } @@ -224,77 +190,88 @@ choose_ff(const char *ffsel, } else { - gmx_fatal(FARGS, - "Force field '%s' occurs in %d places, but not in the current directory.\n" - "Run without the -ff switch and select the force field interactively.", ffsel, nfound); + std::string message = gmx::formatString( + "Force field '%s' occurs in %d places, but not in " + "the current directory.\n" + "Run without the -ff switch and select the force " + "field interactively.", ffsel, nfound); + GMX_THROW(gmx::InconsistentInputError(message)); } } else if (nfound == 0) { - gmx_fatal(FARGS, "Could not find force field '%s' in current directory, install tree or GMXDATA path.", ffsel); + std::string message = gmx::formatString( + "Could not find force field '%s' in current directory, " + "install tree or GMXLIB path.", ffsel); + GMX_THROW(gmx::InconsistentInputError(message)); } } else if (nff > 1) { - snew(desc, nff); - for (i = 0; (i < nff); i++) + std::vector desc; + desc.reserve(ffdirs.size()); + for (int i = 0; i < nff; ++i) { - sprintf(buf, "%s%c%s%s%c%s", - ffs_dir[i], DIR_SEPARATOR, - ffs[i], fflib_forcefield_dir_ext(), DIR_SEPARATOR, - fflib_forcefield_doc()); - if (gmx_fexist(buf)) + std::string docFileName( + gmx::Path::join(ffdirs[i].dir, ffdirs[i].name, + fflib_forcefield_doc())); + // TODO: Just try to open the file with a method that does not + // throw/bail out with a fatal error instead of multiple checks. + if (gmx::File::exists(docFileName)) { + // TODO: Use a C++ API without such an intermediate/fixed-length buffer. + char buf[STRLEN]; /* We don't use fflib_open, because we don't want printf's */ - fp = gmx_ffopen(buf, "r"); - snew(desc[i], STRLEN); - get_a_line(fp, desc[i], STRLEN); + FILE *fp = gmx_ffopen(docFileName.c_str(), "r"); + get_a_line(fp, buf, STRLEN); gmx_ffclose(fp); + desc.push_back(buf); } else { - desc[i] = gmx_strdup(ffs[i]); + desc.push_back(ffs[i]); } } /* Order force fields from the same dir alphabetically * and put deprecated force fields at the end. */ - for (i = 0; (i < nff); i++) + for (int i = 0; i < nff; ++i) { - for (j = i+1; (j < nff); j++) + for (int j = i + 1; j < nff; ++j) { - if (strcmp(ffs_dir[i], ffs_dir[j]) == 0 && + if (ffdirs[i].dir == ffdirs[j].dir && ((desc[i][0] == '[' && desc[j][0] != '[') || ((desc[i][0] == '[' || desc[j][0] != '[') && - gmx_strcasecmp(desc[i], desc[j]) > 0))) + gmx_strcasecmp(desc[i].c_str(), desc[j].c_str()) > 0))) { - swap_strings(ffdirs, i, j); - swap_strings(ffs, i, j); - swap_strings(desc, i, j); + std::swap(ffdirs[i].name, ffdirs[j].name); + std::swap(ffs[i], ffs[j]); + std::swap(desc[i], desc[j]); } } } printf("\nSelect the Force Field:\n"); - for (i = 0; (i < nff); i++) + for (int i = 0; i < nff; ++i) { - if (i == 0 || strcmp(ffs_dir[i-1], ffs_dir[i]) != 0) + if (i == 0 || ffdirs[i-1].dir != ffdirs[i].dir) { - if (strcmp(ffs_dir[i], ".") == 0) + if (ffdirs[i].dir == ".") { printf("From current directory:\n"); } else { - printf("From '%s':\n", ffs_dir[i]); + printf("From '%s':\n", ffdirs[i].dir.c_str()); } } - printf("%2d: %s\n", i+1, desc[i]); - sfree(desc[i]); + printf("%2d: %s\n", i+1, desc[i].c_str()); } - sfree(desc); sel = -1; + // TODO: Add a C++ API for this. + char buf[STRLEN]; + char *pret; do { pret = fgets(buf, STRLEN, stdin); @@ -312,12 +289,20 @@ choose_ff(const char *ffsel, * This check assumes that the order of ffs matches the order * in which fflib_open searches ff library files. */ - for (i = 0; i < sel; i++) + for (int i = 0; i < sel; i++) { - if (strcmp(ffs[i], ffs[sel]) == 0) + if (ffs[i] == ffs[sel]) { - gmx_fatal(FARGS, "Can only select the first of multiple force field entries with directory name '%s%s' in the list. If you want to use the next entry, run pdb2gmx in a different directory or rename or move the force field directory present in the current working directory.", - ffs[sel], fflib_forcefield_dir_ext()); + std::string message = gmx::formatString( + "Can only select the first of multiple force " + "field entries with directory name '%s%s' in " + "the list. If you want to use the next entry, " + "run pdb2gmx in a different directory, set GMXLIB " + "to point to the desired force field first, and/or " + "rename or move the force field directory present " + "in the current working directory.", + ffs[sel].c_str(), fflib_forcefield_dir_ext()); + GMX_THROW(gmx::NotImplementedError(message)); } } } @@ -326,29 +311,44 @@ choose_ff(const char *ffsel, sel = 0; } - if (strlen(ffs[sel]) >= (size_t)ff_maxlen) + if (ffs[sel].length() >= static_cast(ff_maxlen)) { - gmx_fatal(FARGS, "Length of force field name (%d) >= maxlen (%d)", - strlen(ffs[sel]), ff_maxlen); + std::string message = gmx::formatString( + "Length of force field name (%d) >= maxlen (%d)", + static_cast(ffs[sel].length()), ff_maxlen); + GMX_THROW(gmx::InvalidInputError(message)); } - strcpy(forcefield, ffs[sel]); + strcpy(forcefield, ffs[sel].c_str()); - if (strlen(ffdirs[sel]) >= (size_t)ffdir_maxlen) + std::string ffpath; + if (ffdirs[sel].bFromDefaultDir) + { + ffpath = ffdirs[sel].name; + } + else + { + ffpath = gmx::Path::join(ffdirs[sel].dir, ffdirs[sel].name); + } + if (ffpath.length() >= static_cast(ffdir_maxlen)) { - gmx_fatal(FARGS, "Length of force field dir (%d) >= maxlen (%d)", - strlen(ffdirs[sel]), ffdir_maxlen); + std::string message = gmx::formatString( + "Length of force field dir (%d) >= maxlen (%d)", + static_cast(ffpath.length()), ffdir_maxlen); + GMX_THROW(gmx::InvalidInputError(message)); } - strcpy(ffdir, ffdirs[sel]); + strcpy(ffdir, ffpath.c_str()); +} - for (i = 0; (i < nff); i++) +void +choose_ff(const char *ffsel, + char *forcefield, int ff_maxlen, + char *ffdir, int ffdir_maxlen) +{ + try { - sfree(ffdirs[i]); - sfree(ffs[i]); - sfree(ffs_dir[i]); + choose_ff_impl(ffsel, forcefield, ff_maxlen, ffdir, ffdir_maxlen); } - sfree(ffdirs); - sfree(ffs); - sfree(ffs_dir); + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } void choose_watermodel(const char *wmsel, const char *ffdir, diff --git a/src/gromacs/utility/datafilefinder.cpp b/src/gromacs/utility/datafilefinder.cpp index da1e3e0e3b..dfad443b4a 100644 --- a/src/gromacs/utility/datafilefinder.cpp +++ b/src/gromacs/utility/datafilefinder.cpp @@ -48,6 +48,7 @@ #include #include +#include "gromacs/utility/directoryenumerator.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/file.h" #include "gromacs/utility/path.h" @@ -153,7 +154,7 @@ std::string DataFileFinder::findFile(const DataFileOptions &options) const const char *const envName = (impl_.get() ? impl_->envName_ : NULL); const bool bEnvIsSet = (impl_.get() ? impl_->bEnvIsSet_ : false); std::string message( - formatString("Library file %s not found", options.filename_)); + formatString("Library file '%s' not found", options.filename_)); if (options.bCurrentDir_) { message.append(" in current dir nor"); @@ -195,4 +196,59 @@ std::string DataFileFinder::findFile(const DataFileOptions &options) const return std::string(); } +std::vector +DataFileFinder::enumerateFiles(const DataFileOptions &options) const +{ + // TODO: Consider if not being able to list one of the directories should + // really be a fatal error. Or alternatively, check somewhere else that + // paths in GMXLIB are valid. + std::vector result; + std::vector::const_iterator i; + if (options.bCurrentDir_) + { + std::vector files + = DirectoryEnumerator::enumerateFilesWithExtension( + ".", options.filename_, false); + for (i = files.begin(); i != files.end(); ++i) + { + result.push_back(DataFileInfo(".", *i, false)); + } + } + if (impl_.get()) + { + std::vector::const_iterator j; + for (j = impl_->searchPath_.begin(); j != impl_->searchPath_.end(); ++j) + { + std::vector files + = DirectoryEnumerator::enumerateFilesWithExtension( + j->c_str(), options.filename_, false); + for (i = files.begin(); i != files.end(); ++i) + { + result.push_back(DataFileInfo(*j, *i, false)); + } + } + } + const char *const defaultPath = getProgramContext().defaultLibraryDataPath(); + if (defaultPath != NULL && defaultPath[0] != '\0') + { + std::vector files + = DirectoryEnumerator::enumerateFilesWithExtension( + defaultPath, options.filename_, false); + for (i = files.begin(); i != files.end(); ++i) + { + result.push_back(DataFileInfo(defaultPath, *i, true)); + } + } + if (result.empty() && options.bThrow_) + { + // TODO: Print the search path as is done in findFile(). + std::string message( + formatString("Could not find any files ending on '%s' in the " + "current directory or the GROMACS library search path", + options.filename_)); + GMX_THROW(FileIOError(message)); + } + return result; +} + } // namespace gmx diff --git a/src/gromacs/utility/datafilefinder.h b/src/gromacs/utility/datafilefinder.h index 383ccfb7ea..f675fceba4 100644 --- a/src/gromacs/utility/datafilefinder.h +++ b/src/gromacs/utility/datafilefinder.h @@ -46,6 +46,7 @@ #include #include +#include #include "gromacs/utility/classhelpers.h" @@ -118,6 +119,45 @@ class DataFileOptions friend class DataFileFinder; }; +/*! \brief + * Information about a data file found by DataFileFinder::enumerateFiles(). + * + * \inpublicapi + * \ingroup module_utility + */ +struct DataFileInfo +{ + //! Initializes the structure with given values. + DataFileInfo(const std::string &dir, const std::string &name, bool bDefault) + : dir(dir), name(name), bFromDefaultDir(bDefault) + { + } + + /*! \brief + * Directory from which the file was found. + * + * If the file was found from the current directory, this will be `"."`. + * In other cases, this will be a full path (except if the user-provided + * search path contains relative paths). + */ + std::string dir; + /*! \brief + * Name of the file without any directory name. + */ + std::string name; + /*! \brief + * Whether the file was found from the default directory. + * + * If `true`, the file was found from the default installation data + * directory, not from the current directory or any user-provided (through + * DataFileFinder::setSearchPathFromEnv()) location. + * \todo + * Consider replacing with an enum that identifies the source (current dir, + * GMXLIB, default). + */ + bool bFromDefaultDir; +}; + /*! \brief * Searches data files from a set of paths. * @@ -160,6 +200,7 @@ class DataFileFinder * \param[in] options Identifies the file to be searched for. * \returns The opened file handle, or `NULL` if the file could not be * found and exceptions were turned off. + * \throws std::bad_alloc if out of memory. * \throws FileIOError if * - no such file can be found, and \p options specifies that an * exception should be thrown, or @@ -176,6 +217,7 @@ class DataFileFinder * \param[in] options Identifies the file to be searched for. * \returns Full path to the data file, or an empty string if the file * could not be found and exceptions were turned off. + * \throws std::bad_alloc if out of memory. * \throws FileIOError if no such file can be found, and \p options * specifies that an exception should be thrown. * @@ -184,6 +226,26 @@ class DataFileFinder * Returns the full path to the first file found. */ std::string findFile(const DataFileOptions &options) const; + /*! \brief + * Enumerates files in the data directories. + * + * \param[in] options Idenfies files to be searched for. + * \returns Information about each found file. + * \throws std::bad_alloc if out of memory. + * \throws FileIOError if no such file can be found, and \p options + * specifies that an exception should be thrown. + * + * Enumerates all files in the data directories that have the + * extension/suffix specified by the file name in \p options. + * Unlike findFile() and openFile(), this only works on files that are + * in the actual data directories, not for any entry within + * subdirectories of those. + * See DataFileInfo for details on what is returned for each found + * file. + * Files from the same directory will be returned as a continuous block + * in the returned vector. + */ + std::vector enumerateFiles(const DataFileOptions &options) const; private: class Impl; diff --git a/src/gromacs/utility/directoryenumerator.cpp b/src/gromacs/utility/directoryenumerator.cpp new file mode 100644 index 0000000000..f6b34f9aef --- /dev/null +++ b/src/gromacs/utility/directoryenumerator.cpp @@ -0,0 +1,305 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2010,2011,2014,2015, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief + * Implements gmx::DirectoryEnumerator. + * + * \author Erik Lindahl (original C implementation) + * \author Teemu Murtola (C++ wrapper + errno handling) + * \ingroup module_utility + */ +#include "gmxpre.h" + +#include "directoryenumerator.h" + +#include "config.h" + +#include +#include + +#include +#include +#include + +#ifdef HAVE_DIRENT_H +#include +#endif +#ifdef GMX_NATIVE_WINDOWS +#include +#endif + +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/futil.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/smalloc.h" +#include "gromacs/utility/stringutil.h" + +namespace gmx +{ + +/******************************************************************** + * DirectoryEnumerator::Impl + */ + +// TODO: Consider whether checking the return value of closing would be useful, +// and what could we do if it fails? +#if defined GMX_NATIVE_WINDOWS +// TODO: Consider if Windows provides more error details through other APIs. +class DirectoryEnumerator::Impl +{ + public: + static Impl *init(const char *dirname, bool bThrow) + { + std::string tmpname(dirname); + // Remove possible trailing directory separator. + // TODO: Use a method in gmx::Path instead. + if (tmpname.back() == '/' || tmpname.back() == '\\') + { + tmpname.pop_back(); + } + + // Add wildcard. + tmpname.append("/*"); + + errno = 0; + _finddata_t finddata; + intptr_t handle = _findfirst(tmpname.c_str(), &finddata); + if (handle < 0L) + { + if (errno != ENOENT && bThrow) + { + const int code = errno; + const std::string message = + formatString("Failed to list files in directory '%s'", + dirname); + GMX_THROW_WITH_ERRNO(FileIOError(message), "_findfirst", code); + } + return NULL; + } + return new Impl(handle, finddata); + } + Impl(intptr_t handle, _finddata_t finddata) + : windows_handle(handle), finddata(finddata), bFirst_(true) + { + } + ~Impl() + { + _findclose(windows_handle); + } + + bool nextFile(std::string *filename) + { + if (bFirst_) + { + *filename = finddata.name; + bFirst_ = false; + return true; + } + else + { + errno = 0; + if (_findnext(windows_handle, &finddata) != 0) + { + if (errno == 0 || errno == ENOENT) + { + filename->clear(); + return false; + } + else + { + GMX_THROW_WITH_ERRNO( + FileIOError("Failed to list files in a directory"), + "_findnext", errno); + } + } + *filename = finddata.name; + return true; + } + } + + private: + intptr_t windows_handle; + _finddata_t finddata; + bool bFirst_; +}; +#elif defined HAVE_DIRENT_H +class DirectoryEnumerator::Impl +{ + public: + static Impl *init(const char *dirname, bool bThrow) + { + errno = 0; + DIR *handle = opendir(dirname); + if (handle == NULL) + { + if (bThrow) + { + const int code = errno; + const std::string message = + formatString("Failed to list files in directory '%s'", + dirname); + GMX_THROW_WITH_ERRNO(FileIOError(message), "opendir", code); + } + return NULL; + } + return new Impl(handle); + } + explicit Impl(DIR *handle) : dirent_handle(handle) + { + // TODO: Use memory allocation that throws, and handle + // exception safety (close handle) in such a case. + /* On some platforms no space is present for d_name in dirent. + * Since d_name is guaranteed to be the last entry, allocating + * extra space for dirent will allow more size for d_name. + * GMX_MAX_PATH should always be >= the max possible d_name. + */ + smalloc(direntp_large, sizeof(*direntp_large) + GMX_PATH_MAX); + } + ~Impl() + { + sfree(direntp_large); + closedir(dirent_handle); + } + + bool nextFile(std::string *filename) + { + errno = 0; + dirent *p; + int rc = readdir_r(dirent_handle, direntp_large, &p); + if (p == NULL && rc == 0) + { + filename->clear(); + return false; + } + else if (rc != 0) + { + GMX_THROW_WITH_ERRNO( + FileIOError("Failed to list files in a directory"), + "readdir_r", errno); + } + *filename = direntp_large->d_name; + return true; + } + + private: + DIR *dirent_handle; + dirent *direntp_large; +}; +#else +class DirectoryEnumerator::Impl +{ + public: + static Impl *init(const char * /*dirname*/, bool /*bThrow*/) + { + std::string message( + "Source compiled without POSIX dirent or Windows support " + "- cannot scan directories. In the very unlikely event " + "this is not a compile-time mistake you could consider " + "implementing support for your platform in " + "directoryenumerator.cpp, but contact the developers " + "to make sure it's really necessary!"); + GMX_THROW(NotImplementedError(message)); + } + + bool nextFile(std::string * /*filename*/) + { + return false; + } +}; +#endif + +/******************************************************************** + * DirectoryEnumerator + */ + +// static +std::vector +DirectoryEnumerator::enumerateFilesWithExtension( + const char *dirname, const char *extension, bool bThrow) +{ + std::vector result; + DirectoryEnumerator dir(dirname, bThrow); + std::string nextName; + while (dir.nextFile(&nextName)) + { + if (debug) + { + std::fprintf(debug, "dir '%s' file '%s'\n", + dirname, nextName.c_str()); + } + // TODO: What about case sensitivity? + if (endsWith(nextName, extension)) + { + result.push_back(nextName); + } + } + + std::sort(result.begin(), result.end()); + return result; +} + + +DirectoryEnumerator::DirectoryEnumerator(const char *dirname, bool bThrow) + : impl_(NULL) +{ + GMX_RELEASE_ASSERT(dirname != NULL && dirname[0] != '\0', + "Attempted to open empty/null directory path"); + impl_.reset(Impl::init(dirname, bThrow)); +} + +DirectoryEnumerator::DirectoryEnumerator(const std::string &dirname, bool bThrow) + : impl_(NULL) +{ + GMX_RELEASE_ASSERT(!dirname.empty(), + "Attempted to open empty/null directory path"); + impl_.reset(Impl::init(dirname.c_str(), bThrow)); +} + +DirectoryEnumerator::~DirectoryEnumerator() +{ +} + +bool DirectoryEnumerator::nextFile(std::string *filename) +{ + if (!impl_.get()) + { + filename->clear(); + return false; + } + return impl_->nextFile(filename); +} + +} // namespace gmx diff --git a/src/gromacs/utility/directoryenumerator.h b/src/gromacs/utility/directoryenumerator.h new file mode 100644 index 0000000000..2a3962cd9d --- /dev/null +++ b/src/gromacs/utility/directoryenumerator.h @@ -0,0 +1,122 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2015, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * \brief + * Declares gmx::DirectoryEnumerator. + * + * \author Teemu Murtola + * \inlibraryapi + * \ingroup module_utility + */ +#ifndef GMX_UTILITY_DIRECTORYENUMERATOR_H +#define GMX_UTILITY_DIRECTORYENUMERATOR_H + +#include +#include + +#include "gromacs/utility/classhelpers.h" + +namespace gmx +{ + +/*! \libinternal \brief + * Lists files in a directory. + * + * \inlibraryapi + * \ingroup module_utility + */ +class DirectoryEnumerator +{ + public: + /*! \brief + * Convenience function to list files with certain extension from a + * directory. + * + * \param[in] dirname Path to the directory to list. + * \param[in] extension List files with the given extension + * (or suffix in file name). + * \param[in] bThrow Whether failure to open the directory should throw. + * \returns List of files with the given extension in \p dirname. + * \throws std::bad_alloc if out of memory. + * \throws FileIOError if opening the directory fails and `bThrow == true`. + * \throws FileIOError if some other I/O error occurs. + */ + static std::vector enumerateFilesWithExtension( + const char *dirname, const char *extension, bool bThrow); + + /*! \brief + * Opens a directory for listing. + * + * \param[in] dirname Path to the directory to list. + * \param[in] bThrow Whether failure to open the directory should throw. + * \throws std::bad_alloc if out of memory. + * \throws FileIOError if opening the directory fails and `bThrow == true` + */ + explicit DirectoryEnumerator(const char *dirname, bool bThrow = true); + //! \copydoc DirectoryEnumerator(const char *, bool) + explicit DirectoryEnumerator(const std::string &dirname, bool bThrow = true); + ~DirectoryEnumerator(); + + /*! \brief + * Gets next file in a directory. + * + * \param[out] filename Name of the next file. + * \returns `false` if there were no more files. + * \throws std::bad_alloc if out of memory. + * \throws FileIOError if listing the next file fails. + * + * If all files from the directory have been returned (or there are no + * files in the directory and this is the first call), the method + * returns `false` and \p filename is cleared. + * Otherwise, the return value is `true` and the first/next file name + * is returned in \p filename. + * \p filename will not contain any path information, only the name of + * the file. + * + * If `bThrow` passed to the constructor was `false` and the directory + * was not successfully opened, the first call to this function will + * return `false`. + */ + bool nextFile(std::string *filename); + + private: + class Impl; + + PrivateImplPointer impl_; +}; + +} // namespace gmx + +#endif diff --git a/src/gromacs/utility/futil.cpp b/src/gromacs/utility/futil.cpp index 6a45173cd4..0860999329 100644 --- a/src/gromacs/utility/futil.cpp +++ b/src/gromacs/utility/futil.cpp @@ -48,15 +48,11 @@ #include #include -#ifdef HAVE_DIRENT_H -/* POSIX */ -#include -#endif #ifdef HAVE_UNISTD_H #include #endif #ifdef GMX_NATIVE_WINDOWS -#include +#include // For _chdir() and _getcwd() #include #include #endif @@ -525,194 +521,6 @@ FILE *gmx_ffopen(const char *file, const char *mode) #endif } -/* Our own implementation of dirent-like functionality to scan directories. */ -struct gmx_directory -{ -#if defined(GMX_NATIVE_WINDOWS) - intptr_t windows_handle; - struct _finddata_t finddata; - int first; -#elif defined(HAVE_DIRENT_H) - DIR * dirent_handle; -#else - int dummy; -#endif -}; - - -int -gmx_directory_open(gmx_directory_t *p_gmxdir, const char *dirname) -{ - struct gmx_directory * gmxdir; - int rc; - - snew(gmxdir, 1); - - *p_gmxdir = gmxdir; - -#if defined(GMX_NATIVE_WINDOWS) - if (dirname != NULL && strlen(dirname) > 0) - { - char * tmpname; - int len; - - len = strlen(dirname); - snew(tmpname, len+3); - - strncpy(tmpname, dirname, len+1); - - /* Remove possible trailing directory separator */ - if (tmpname[len] == '/' || tmpname[len] == '\\') - { - tmpname[len] = '\0'; - } - - /* Add wildcard */ - strcat(tmpname, "/*"); - - gmxdir->first = 1; - if ( (gmxdir->windows_handle = _findfirst(tmpname, &gmxdir->finddata)) > 0L) - { - rc = 0; - } - else - { - if (errno == EINVAL) - { - sfree(gmxdir); - *p_gmxdir = NULL; - rc = EINVAL; - } - else - { - rc = 0; - } - } - } - else - { - rc = EINVAL; - } -#elif defined(HAVE_DIRENT_H) - if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL) - { - rc = 0; - } - else - { - sfree(gmxdir); - *p_gmxdir = NULL; - rc = EINVAL; - } -#else - gmx_fatal(FARGS, - "Source compiled without POSIX dirent or windows support - cannot scan directories.\n" - "In the very unlikely event this is not a compile-time mistake you could consider\n" - "implementing support for your platform in futil.c, but contact the developers\n" - "to make sure it's really necessary!\n"); - rc = -1; -#endif - return rc; -} - - -int -gmx_directory_nextfile(gmx_directory_t gmxdir, char *name, int maxlength_name) -{ - int rc; - -#if defined(GMX_NATIVE_WINDOWS) - if (gmxdir != NULL) - { - if (gmxdir->windows_handle <= 0) - { - - name[0] = '\0'; - rc = ENOENT; - } - else if (gmxdir->first == 1) - { - strncpy(name, gmxdir->finddata.name, maxlength_name); - rc = 0; - gmxdir->first = 0; - } - else - { - if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0) - { - strncpy(name, gmxdir->finddata.name, maxlength_name); - rc = 0; - } - else - { - name[0] = '\0'; - rc = ENOENT; - } - } - } - else - { - name[0] = '\0'; - rc = EINVAL; - } -#elif defined(HAVE_DIRENT_H) - struct dirent * direntp_large; - struct dirent * p; - - - if (gmxdir != NULL && gmxdir->dirent_handle != NULL) - { - /* On some platforms no space is present for d_name in dirent. - * Since d_name is guaranteed to be the last entry, allocating - * extra space for dirent will allow more size for d_name. - * GMX_MAX_PATH should always be >= the max possible d_name. - */ - smalloc(direntp_large, sizeof(*direntp_large) + GMX_PATH_MAX); - rc = readdir_r(gmxdir->dirent_handle, direntp_large, &p); - - if (p != NULL && rc == 0) - { - strncpy(name, direntp_large->d_name, maxlength_name); - } - else - { - name[0] = '\0'; - rc = ENOENT; - } - sfree(direntp_large); - } - else - { - name[0] = '\0'; - rc = EINVAL; - } -#else - gmx_fatal(FARGS, - "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"); - rc = -1; -#endif - return rc; -} - - -int -gmx_directory_close(gmx_directory_t gmxdir) -{ - int rc; -#if defined(GMX_NATIVE_WINDOWS) - rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL; -#elif defined(HAVE_DIRENT_H) - rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL; -#else - gmx_fatal(FARGS, - "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"); - rc = -1; -#endif - - sfree(gmxdir); - return rc; -} - char *low_gmxlibfn(const char *file, gmx_bool bAddCWD, gmx_bool bFatal) { diff --git a/src/gromacs/utility/futil.h b/src/gromacs/utility/futil.h index 0b5c1eeadf..9db1c8cdf1 100644 --- a/src/gromacs/utility/futil.h +++ b/src/gromacs/utility/futil.h @@ -196,35 +196,6 @@ char *low_gmxlibfn(const char *file, gmx_bool bAddCWD, gmx_bool bFatal); */ FILE *low_libopen(const char *file, gmx_bool bFatal); -/** Opaque data type to list directories. */ -typedef struct gmx_directory *gmx_directory_t; - -/*! \brief - * Opens a directory for reading. - * - * \param[out] p_gmxdir Handle to the opened directory. - * \param[in] dirname Path to directory to open. - * \returns 0 on success. - */ -int -gmx_directory_open(gmx_directory_t *p_gmxdir, const char *dirname); - -/*! \brief - * Gets next file in a directory. - * - * Given an initialized gmx_directory_t, if there are more files in - * the directory this routine returns 0 and write the next name - * into the USER-PROVIDED buffer \p name. The last argument is the max - * number of characters that will be written. Just as strncpy(), the - * string will NOT be terminated it it is longer than \p maxlength_name. - */ -int -gmx_directory_nextfile(gmx_directory_t gmxdir, char *name, int maxlength_name); - -/** Releases all data for a directory structure. */ -int -gmx_directory_close(gmx_directory_t gmxdir); - /*! \brief * Creates unique name for temp file (wrapper around mkstemp). -- 2.22.0