Enhance path search functions
authorMark Abraham <mark.j.abraham@gmail.com>
Tue, 30 Apr 2019 15:03:53 +0000 (17:03 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 2 May 2019 12:52:44 +0000 (14:52 +0200)
Extracted common implementation functions based on compat::string_view
and simplified the uses of them. Some temporary strings are no longer
created. Added, updated, and corrected Doxygen. Removed comments about
dot and dot-dot special directories, as the code does not do anything
about them. Moved some naming in the direction of C++17
std::filesystem::path.

Added new tests to cover the changes. Used namespaces better in the
test file. Used reference data for the tests.

Eliminated getParentPathAndBasename as redundant.

Change-Id: I11a1a25a257da753ca83f5bdd09d06b11c76cec5

docs/doxygen/cycle-suppressions.txt
src/gromacs/utility/futil.cpp
src/gromacs/utility/path.cpp
src/gromacs/utility/path.h
src/gromacs/utility/tests/path.cpp
src/gromacs/utility/tests/refdata/PathTest_SearchOperationsWork.xml [new file with mode: 0644]

index 76e3453a9c104385bda8f02fb5057f39ac7b13d8..56796b5f842da490b8751eec0d641ab2d4421d30 100644 (file)
@@ -1,6 +1,12 @@
 # Order (or more generally, edge selection) is significant (see gmxtree.md);
 # "moduleA -> moduleB" means that moduleA should not depend on moduleB, and is
 # a problem to be addressed at some point.
+
+# Compat wants assertions and string code from utility, and other string code
+# in utility wants to use string_view from compat. If we fix this, it might be
+# best to implement the compat namespace within the utility module.
+compat -> utility
+
 domdec -> imd
 domdec -> ewald
 domdec -> mdlib
index ccffa764d188e0836ba9118075e03516ea27ce66..99cb8f97b1a60e0777b77c7ca811d732a3f239ea 100644 (file)
@@ -367,8 +367,9 @@ static std::string backup_fn(const std::string &file)
 {
     int          count = 1;
 
-    std::string  directory, fn, buf;
-    std::tie(directory, fn) = gmx::Path::getParentPathAndBasename(file);
+    std::string  directory = gmx::Path::getParentPath(file);
+    std::string  fn        = gmx::Path::getFilename(file);
+    std::string  buf;
     if (directory.empty())
     {
         directory = ".";
index c1da7e4c939a475cb6bbed1295ab6e49851a8c30..57335b1965469c0bdd49dd40af91ec4479d1eeb5 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -67,6 +67,7 @@
 #endif
 #endif
 
+#include "gromacs/compat/string_view.h"
 #include "gromacs/utility/dir_separator.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/futil.h"
@@ -265,79 +266,111 @@ std::string Path::join(const std::string &path1,
     return path1 + cDirSeparator + path2 + cDirSeparator + path3;
 }
 
-std::string Path::getParentPath(const std::string &path)
+namespace
+{
+
+/*! \brief Returns a view of the parent path (ie. directory
+ * components) of \c input ie. up to but excluding the last directory
+ * separator (if one exists).
+ *
+ * \returns A view of the parent-path components, or empty if no
+ * directory separator exists. */
+compat::string_view getParentPathView(const std::string &input)
 {
-    /* Expects that the path doesn't contain "." or "..". If used on a path for
-     * which this isn't guaranteed realpath needs to be called first. */
-    size_t pos = path.find_last_of(cDirSeparators);
+    auto   inputView = compat::to_string_view(input);
+    size_t pos       = inputView.find_last_of(cDirSeparators);
     if (pos == std::string::npos)
     {
-        return std::string();
+        return compat::string_view();
     }
-    return path.substr(0, pos);
+    return inputView.substr(0, pos);
 }
 
-std::pair<std::string, std::string> Path::getParentPathAndBasename(const std::string &path)
+/*! \brief Returns a view of the filename \c in input ie. after the
+ * last directory separator (if one exists).
+ *
+ * \returns A view of the filename component. */
+compat::string_view getFilenameView(const std::string &input)
 {
-    /* Expects that the path doesn't contain "." or "..". If used on a path for
-     * which this isn't guaranteed realpath needs to be called first. */
-    size_t pos = path.find_last_of(cDirSeparators);
+    auto   inputView = compat::to_string_view(input);
+    size_t pos       = inputView.find_last_of(cDirSeparators);
     if (pos == std::string::npos)
     {
-        return std::make_pair(std::string(), path);
+        return inputView;
     }
-    return std::make_pair(path.substr(0, pos), path.substr(pos+1));
+    return inputView.substr(pos+1);
+}
+
+/*! \brief Returns a view of the stem of the filename in \c input.
+ *
+ * The search for the extension separator takes place only within the
+ * filename component, ie. omitting any leading directories.
+ *
+ * \returns  The view of the filename stem, or empty if none exists. */
+compat::string_view getStemView(const std::string &input)
+{
+    auto   filenameView               = getFilenameView(input);
+    size_t extensionSeparatorPosition = filenameView.find_last_of('.');
+    // If no separator is found, the returned view is of the whole filename.
+    return filenameView.substr(0, extensionSeparatorPosition);
 }
 
-std::string Path::getFilename(const std::string &path)
+/*! \brief Returns a view of the file extension of \c input, including the dot.
+ *
+ * The search for the extension separator takes place only within the
+ * filename component, ie. omitting any leading directories.
+ *
+ * \returns  The view of the file extension, or empty if none exists. */
+compat::string_view getExtensionView(const std::string &input)
 {
-    size_t pos = path.find_last_of(cDirSeparators);
-    if (pos == std::string::npos)
+    auto   filenameView               = getFilenameView(input);
+    size_t extensionSeparatorPosition = filenameView.find_last_of('.');
+    if (extensionSeparatorPosition == compat::string_view::npos)
     {
-        return path;
+        // No separator was found
+        return compat::string_view();
     }
-    return path.substr(pos+1);
+    return filenameView.substr(extensionSeparatorPosition);
 }
 
-bool Path::hasExtension(const std::string &path)
+}   // namespace
+
+std::string Path::getParentPath(const std::string &input)
 {
-    return getFilename(path).find('.') != std::string::npos;
+    return compat::to_string(getParentPathView(input));
 }
 
-std::string Path::stripExtension(const std::string &path)
+std::string Path::getFilename(const std::string &input)
 {
-    size_t dirSeparatorPos = path.find_last_of(cDirSeparators);
-    size_t extPos          = path.find_last_of('.');
-    if (extPos == std::string::npos
-        || (dirSeparatorPos != std::string::npos && extPos < dirSeparatorPos))
-    {
-        return path;
-    }
-    return path.substr(0, extPos);
+    return to_string(getFilenameView(input));
+}
+
+bool Path::hasExtension(const std::string &input)
+{
+    // This could be implemented with getStemView, but that search is
+    // less efficient than just finding the first of possibly multiple
+    // separator characters.
+    return getFilenameView(input).find('.') != std::string::npos;
+}
+
+std::string Path::stripExtension(const std::string &input)
+{
+    auto   pathView   = getParentPathView(input);
+    // Make sure the returned string will have room for the directory
+    // separator between the parent path and the stem, but only where
+    // it is needed.
+    size_t pathLength = pathView.empty() ? 0 : pathView.length() + 1;
+    auto   stemView   = getStemView(input);
+    return std::string(std::begin(input),
+                       std::begin(input) + pathLength + stemView.length());
 }
 
 std::string Path::concatenateBeforeExtension(const std::string &input, const std::string &stringToAdd)
 {
-    std::string output;
-    size_t      dirSeparatorPosition = input.find_last_of(cDirSeparators);
-    size_t      extSeparatorPosition = input.find_last_of('.');
-    bool        havePath             = (dirSeparatorPosition != std::string::npos);
-    // Make sure that if there's an extension-separator character,
-    // that it follows the last path-separator character (if any),
-    // before we interpret it as an extension separator.
-    bool haveExtension = (extSeparatorPosition != std::string::npos &&
-                          ((!havePath ||
-                            (extSeparatorPosition > dirSeparatorPosition))));
-    if (!haveExtension)
-    {
-        output = input + stringToAdd;
-    }
-    else
-    {
-        output  = input.substr(0, extSeparatorPosition);
-        output += stringToAdd;
-        output += std::string(input, extSeparatorPosition);
-    }
+    std::string output = stripExtension(input);
+    output += stringToAdd;
+    auto        extensionView = getExtensionView(input);
+    output.append(std::begin(extensionView), std::end(extensionView));
     return output;
 }
 
index b6774068a67ad4167eab2d54f29023d38162e7e7..7775308df461aff24f17942143b1faee477fb8c7 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2011,2012,2013,2014,2015,2016,2018, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -66,25 +66,24 @@ class Path
                                 const std::string &path3);
         //! Return a path using directory separators that suit the execution OS.
         static std::string normalize(const std::string &path);
-        /*! \brief Returns the part of the path before the last
-         * directory separator, if any.
+        /*! \brief Returns a copy of the parent path (ie. directory
+         * components) of \c input ie. up to but excluding the last
+         * directory separator (if one exists).
          *
-         * Path must not contain '.' or '..' elements. */
-        static std::string getParentPath(const std::string &path);
-        /*! \brief Returns the parts of the path before and after the
-         * last directory separator, if any.
-         *
-         * Path must not contain '.' or '..' elements. */
-        static std::pair<std::string, std::string> getParentPathAndBasename(const std::string &path);
-        static std::string getFilename(const std::string &path);
-        static bool hasExtension(const std::string &path);
-        static std::string stripExtension(const std::string &path);
-        /*! \brief Concatenate \c stringToAdd to \c input, before any
-         * file extension (if one exists), and return the result.
-         *
-         * To be recognized as an extension, an extension-separator
-         * character must follow the last path-separator character (if
-         * any). */
+         * \returns A copy of the parent path-components, or empty if
+         * no directory separator exists. */
+        static std::string getParentPath(const std::string &input);
+        /*! \brief Returns a copy of the filename in \c input
+         * ie. after the last directory separator (if one exists). */
+        static std::string getFilename(const std::string &input);
+        //! Returns whether an extension is present in \c input.
+        static bool hasExtension(const std::string &input);
+        /*! \brief Returns a copy of the input without any trailing
+         * extension found in the filename component. */
+        static std::string stripExtension(const std::string &input);
+        /*! \brief Concatenate \c stringToAdd to a copy of \c input,
+         * before any file extension (if one exists), and return the
+         * result. */
         static std::string concatenateBeforeExtension(const std::string &input,
                                                       const std::string &stringToAdd);
 
index 786f4235b97351ecb0dfb53ee94e4ae4ba2bdf91..8cc71dfc9e4124993a11fb4ff9f1e6f0a0d6480c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2018, by the GROMACS development team, led by
+ * Copyright (c) 2016,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 
 #include <gtest/gtest.h>
 
+#include "testutils/refdata.h"
+#include "testutils/testasserts.h"
+
+namespace gmx
+{
+namespace test
+{
 namespace
 {
 
 TEST(PathTest, StripSourcePrefixWorks)
 {
-    EXPECT_STREQ("", gmx::Path::stripSourcePrefix(""));
-    EXPECT_STREQ("foo.cpp", gmx::Path::stripSourcePrefix("foo.cpp"));
-    EXPECT_STREQ("foo.cpp", gmx::Path::stripSourcePrefix("some/dir/foo.cpp"));
-    EXPECT_STREQ("foo.cpp", gmx::Path::stripSourcePrefix("src/some/dir/foo.cpp"));
+    EXPECT_STREQ("", Path::stripSourcePrefix(""));
+    EXPECT_STREQ("foo.cpp", Path::stripSourcePrefix("foo.cpp"));
+    EXPECT_STREQ("foo.cpp", Path::stripSourcePrefix("some/dir/foo.cpp"));
+    EXPECT_STREQ("foo.cpp", Path::stripSourcePrefix("src/some/dir/foo.cpp"));
     EXPECT_STREQ("foo.cpp",
-                 gmx::Path::stripSourcePrefix("srcx/gromacs/foo.cpp"));
+                 Path::stripSourcePrefix("srcx/gromacs/foo.cpp"));
     EXPECT_STREQ("src/gromacs/foo.cpp",
-                 gmx::Path::stripSourcePrefix("src/gromacs/foo.cpp"));
+                 Path::stripSourcePrefix("src/gromacs/foo.cpp"));
     EXPECT_STREQ("src/gromacs/foo.cpp",
-                 gmx::Path::stripSourcePrefix("some/dir/src/gromacs/foo.cpp"));
+                 Path::stripSourcePrefix("some/dir/src/gromacs/foo.cpp"));
     // TODO: For in-source builds, this might not work.
-    EXPECT_EQ(gmx::Path::normalize("src/gromacs/utility/tests/path.cpp"),
-              gmx::Path::stripSourcePrefix(__FILE__))
+    EXPECT_EQ(Path::normalize("src/gromacs/utility/tests/path.cpp"),
+              Path::stripSourcePrefix(__FILE__))
     << "stripSourcePrefix() does not work with compiler-produced file names. "
     << "This only affects source paths reported in fatal error messages.";
 }
 
-TEST(PathTest, ConcatenateBeforeExtensionWorks)
+TEST(PathTest, SearchOperationsWork)
 {
-    EXPECT_STREQ("md1.log", gmx::Path::concatenateBeforeExtension("md.log", "1").c_str());
-    EXPECT_STREQ("ener0", gmx::Path::concatenateBeforeExtension("ener", "0").c_str());
-    EXPECT_STREQ("simpledir/traj34.tng", gmx::Path::concatenateBeforeExtension("simpledir/traj.tng", "34").c_str());
-    EXPECT_STREQ("complex.dir/traj34.tng", gmx::Path::concatenateBeforeExtension("complex.dir/traj.tng", "34").c_str());
-    EXPECT_STREQ("complex.dir/traj34", gmx::Path::concatenateBeforeExtension("complex.dir/traj", "34").c_str());
-}
-
-TEST(PathTest, GetParentPathWorks)
-{
-    {
-        auto result = gmx::Path::getParentPath("");
-        EXPECT_EQ("", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("file.txt");
-        EXPECT_EQ("", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("dir/file.txt");
-        EXPECT_EQ("dir", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("windowsdir\\file.txt");
-        EXPECT_EQ("windowsdir", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("dir/anotherdir/file.txt");
-        EXPECT_EQ("dir/anotherdir", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("dir");
-        EXPECT_EQ("", result);
-    }
-    {
-        auto result = gmx::Path::getParentPath("dir/anotherdir");
-        EXPECT_EQ("dir", result);
-    }
-}
-
-TEST(PathTest, GetParentPathAndBasenameWorks)
-{
-    {
-        auto result = gmx::Path::getParentPathAndBasename("");
-        EXPECT_EQ("", std::get<0>(result));
-        EXPECT_EQ("", std::get<1>(result));
-    }
-    {
-        auto result = gmx::Path::getParentPathAndBasename("file.txt");
-        EXPECT_EQ("", std::get<0>(result));
-        EXPECT_EQ("file.txt", std::get<1>(result));
-    }
-    {
-        auto result = gmx::Path::getParentPathAndBasename("dir/file.txt");
-        EXPECT_EQ("dir", std::get<0>(result));
-        EXPECT_EQ("file.txt", std::get<1>(result));
-    }
-    {
-        auto result = gmx::Path::getParentPathAndBasename("windowsdir\\file.txt");
-        EXPECT_EQ("windowsdir", std::get<0>(result));
-        EXPECT_EQ("file.txt", std::get<1>(result));
-    }
-    {
-        auto result = gmx::Path::getParentPathAndBasename("dir/anotherdir/file.txt");
-        EXPECT_EQ("dir/anotherdir", std::get<0>(result));
-        EXPECT_EQ("file.txt", std::get<1>(result));
-    }
-    {
-        auto result = gmx::Path::getParentPathAndBasename("dir");
-        EXPECT_EQ("", std::get<0>(result));
-        EXPECT_EQ("dir", std::get<1>(result));
-    }
+    gmx::test::TestReferenceData    data;
+    gmx::test::TestReferenceChecker rootChecker(data.rootChecker());
+    for (const std::string &input : { "",
+                                      "md.log",
+                                      "md",
+                                      "/tmp/absolute.txt",
+                                      "simpledir/traj.tng",
+                                      "simpledir/traj",
+                                      "windowsdir\\traj.tng",
+                                      "complex.dir/traj.tng",
+                                      "complex.dir/traj",
+                                      "nested/dir/conf.pdb",
+                                      "/tmp/absolutedir/conf.pdb"})
     {
-        auto result = gmx::Path::getParentPathAndBasename("dir/anotherdir");
-        EXPECT_EQ("dir", std::get<0>(result));
-        EXPECT_EQ("anotherdir", std::get<1>(result));
+        SCOPED_TRACE(std::string("for input '") + input + "'");
+        auto checker = rootChecker.checkCompound("PathToTest", input);
+        {
+            std::string result;
+            ASSERT_NO_THROW_GMX(result = Path::getParentPath(input));
+            checker.checkString(result, "getParentPath");
+        }
+        {
+            std::string result;
+            ASSERT_NO_THROW_GMX(result = Path::getFilename(input));
+            checker.checkString(result, "getFilename");
+        }
+        {
+            bool result = false;
+            ASSERT_NO_THROW_GMX(result = Path::hasExtension(input));
+            checker.checkBoolean(result, "hasExtension");
+        }
+        {
+            std::string result;
+            ASSERT_NO_THROW_GMX(result = Path::stripExtension(input));
+            checker.checkString(result, "stripExtension");
+        }
+        {
+            std::string result;
+            ASSERT_NO_THROW_GMX(result = Path::concatenateBeforeExtension(input, "_34"));
+            checker.checkString(result, "concatenateBeforeExtension");
+        }
     }
 }
 
 } // namespace
+} // namespace test
+} // namespace gmx
diff --git a/src/gromacs/utility/tests/refdata/PathTest_SearchOperationsWork.xml b/src/gromacs/utility/tests/refdata/PathTest_SearchOperationsWork.xml
new file mode 100644 (file)
index 0000000..3937ad1
--- /dev/null
@@ -0,0 +1,81 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="referencedata.xsl"?>
+<ReferenceData>
+  <PathToTest>
+    <String Name="getParentPath"></String>
+    <String Name="getFilename"></String>
+    <Bool Name="hasExtension">false</Bool>
+    <String Name="stripExtension"></String>
+    <String Name="concatenateBeforeExtension">_34</String>
+  </PathToTest>
+  <PathToTest Name="md.log">
+    <String Name="getParentPath"></String>
+    <String Name="getFilename">md.log</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">md</String>
+    <String Name="concatenateBeforeExtension">md_34.log</String>
+  </PathToTest>
+  <PathToTest Name="md">
+    <String Name="getParentPath"></String>
+    <String Name="getFilename">md</String>
+    <Bool Name="hasExtension">false</Bool>
+    <String Name="stripExtension">md</String>
+    <String Name="concatenateBeforeExtension">md_34</String>
+  </PathToTest>
+  <PathToTest Name="/tmp/absolute.txt">
+    <String Name="getParentPath">/tmp</String>
+    <String Name="getFilename">absolute.txt</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">/tmp/absolute</String>
+    <String Name="concatenateBeforeExtension">/tmp/absolute_34.txt</String>
+  </PathToTest>
+  <PathToTest Name="simpledir/traj.tng">
+    <String Name="getParentPath">simpledir</String>
+    <String Name="getFilename">traj.tng</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">simpledir/traj</String>
+    <String Name="concatenateBeforeExtension">simpledir/traj_34.tng</String>
+  </PathToTest>
+  <PathToTest Name="simpledir/traj">
+    <String Name="getParentPath">simpledir</String>
+    <String Name="getFilename">traj</String>
+    <Bool Name="hasExtension">false</Bool>
+    <String Name="stripExtension">simpledir/traj</String>
+    <String Name="concatenateBeforeExtension">simpledir/traj_34</String>
+  </PathToTest>
+  <PathToTest Name="windowsdir\traj.tng">
+    <String Name="getParentPath">windowsdir</String>
+    <String Name="getFilename">traj.tng</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">windowsdir\traj</String>
+    <String Name="concatenateBeforeExtension">windowsdir\traj_34.tng</String>
+  </PathToTest>
+  <PathToTest Name="complex.dir/traj.tng">
+    <String Name="getParentPath">complex.dir</String>
+    <String Name="getFilename">traj.tng</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">complex.dir/traj</String>
+    <String Name="concatenateBeforeExtension">complex.dir/traj_34.tng</String>
+  </PathToTest>
+  <PathToTest Name="complex.dir/traj">
+    <String Name="getParentPath">complex.dir</String>
+    <String Name="getFilename">traj</String>
+    <Bool Name="hasExtension">false</Bool>
+    <String Name="stripExtension">complex.dir/traj</String>
+    <String Name="concatenateBeforeExtension">complex.dir/traj_34</String>
+  </PathToTest>
+  <PathToTest Name="nested/dir/conf.pdb">
+    <String Name="getParentPath">nested/dir</String>
+    <String Name="getFilename">conf.pdb</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">nested/dir/conf</String>
+    <String Name="concatenateBeforeExtension">nested/dir/conf_34.pdb</String>
+  </PathToTest>
+  <PathToTest Name="/tmp/absolutedir/conf.pdb">
+    <String Name="getParentPath">/tmp/absolutedir</String>
+    <String Name="getFilename">conf.pdb</String>
+    <Bool Name="hasExtension">true</Bool>
+    <String Name="stripExtension">/tmp/absolutedir/conf</String>
+    <String Name="concatenateBeforeExtension">/tmp/absolutedir/conf_34.pdb</String>
+  </PathToTest>
+</ReferenceData>