Improve handling of GPU IDs

author Mark Abraham <mark.j.abraham@gmail.com>

Wed, 25 Oct 2017 10:08:01 +0000 (12:08 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 1 Nov 2017 11:11:17 +0000 (12:11 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Wed, 25 Oct 2017 10:08:01 +0000 (12:08 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 1 Nov 2017 11:11:17 +0000 (12:11 +0100)
diff --git a/src/gromacs/gmxana/gmx_tune_pme.cpp b/src/gromacs/gmxana/gmx_tune_pme.cpp

index 295facf30d4e187e44272351f47e9f66d37c51ab..4c46036ec93c0159e357694eea25d79be9177574 100644 (file)
--- a/src/gromacs/gmxana/gmx_tune_pme.cpp
+++ b/src/gromacs/gmxana/gmx_tune_pme.cpp
@@ -42,6 +42,7 @@
  #include <ctime>
  
  #include <algorithm>
+#include <string>
  
  #ifdef HAVE_SYS_TIME_H
  #include <sys/time.h>
@@ -61,6 +62,7 @@
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/mdtypes/state.h"
  #include "gromacs/pbcutil/pbc.h"
+#include "gromacs/taskassignment/usergpuids.h"
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/utility/arraysize.h"
@@ -752,57 +754,18 @@ static void check_mdrun_works(gmx_bool    bThreads,
  }
  
  /* Handles the no-GPU case by emitting an empty string. */
-static char *make_gpu_id_command_line(int numRanks, int numPmeRanks, const std::vector<int> &gpu_ids)
+static std::string make_gpu_id_command_line(int numRanks, int numPmeRanks, const std::vector<int> &gpu_ids)
  {
-    char       *command_line, *ptr;
-    const char *flag = "-gpu_id ";
-    int         flag_length;
-
-    /* Reserve enough room for the option name, enough single-digit
-       GPU ids (since that is currently all that is possible to use
-       with mdrun), and a terminating NULL. */
-    flag_length = std::strlen(flag);
-    snew(command_line, flag_length + numRanks + 1);
-    ptr = command_line;
-
      /* If the user has given no eligible GPU IDs, or we're trying the
       * default behaviour, then there is nothing for g_tune_pme to give
       * to mdrun -gpu_id */
      if (!gpu_ids.empty() && numPmeRanks > -1)
      {
-        size_t numPpRanks, max_num_ranks_for_each_GPU;
-
-        /* Write the option flag */
-        std::strcpy(ptr, flag);
-        ptr += flag_length;
-
-        numPpRanks                 = numRanks - numPmeRanks;
-        max_num_ranks_for_each_GPU = numPpRanks / gpu_ids.size();
-        if (max_num_ranks_for_each_GPU * gpu_ids.size() != numPpRanks)
-        {
-            /* Some GPUs will receive more work than others, which
-             * we choose to be those with the lowest indices */
-            max_num_ranks_for_each_GPU++;
-        }
-
-        /* Loop over all eligible GPU ids */
-        for (size_t gpu_id = 0, rank = 0; gpu_id < gpu_ids.size(); gpu_id++)
-        {
-            size_t rank_for_this_GPU;
-            /* Loop over all PP ranks for GPU with ID gpu_id, building the
-               assignment string. */
-            for (rank_for_this_GPU = 0;
-                 rank_for_this_GPU < max_num_ranks_for_each_GPU && rank < numPpRanks;
-                 rank++, rank_for_this_GPU++)
-            {
-                *ptr = '0' + gpu_ids[gpu_id];
-                ptr++;
-            }
-        }
+        return "-gpu_id " + gmx::makeGpuIdString(gpu_ids, numRanks - numPmeRanks);
      }
-    *ptr = '\0';
  
-    return command_line;
+
+    return std::string();
  }
  
  static void launch_simulation(
@@ -819,26 +782,26 @@ static void launch_simulation(
          const std::vector<int>   &gpu_ids)        /* Vector of GPU IDs for
                                                     * constructing mdrun command lines */
  {
-    char  *command, *cmd_gpu_ids;
+    char  *command;
  
  
      /* Make enough space for the system call command,
       * (200 extra chars for -npme ... etc. options should suffice): */
      snew(command, std::strlen(cmd_mpirun)+std::strlen(cmd_mdrun)+std::strlen(cmd_np)+std::strlen(args_for_mdrun)+std::strlen(simulation_tpr)+200);
  
-    cmd_gpu_ids = make_gpu_id_command_line(nnodes, nPMEnodes, gpu_ids);
+    auto cmd_gpu_ids = make_gpu_id_command_line(nnodes, nPMEnodes, gpu_ids);
  
      /* Note that the -passall options requires args_for_mdrun to be at the end
       * of the command line string */
      if (bThreads)
      {
          sprintf(command, "%s%s-npme %d -s %s %s %s",
-                cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun, cmd_gpu_ids);
+                cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun, cmd_gpu_ids.c_str());
      }
      else
      {
          sprintf(command, "%s%s%s -npme %d -s %s %s %s",
-                cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun, cmd_gpu_ids);
+                cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun, cmd_gpu_ids.c_str());
      }
  
      fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch ? "Using" : "Please use", command);
@@ -1435,7 +1398,7 @@ static void do_the_tests(
          int                       presteps,       /* DLB equilibration steps, is checked    */
          gmx_int64_t               cpt_steps,      /* Time step counter in the checkpoint    */
          gmx_bool                  bCheck,         /* Check whether benchmark mdrun works    */
-        const std::vector<int>   &gpu_ids)        /* Vector of GPU IDs for
+        const std::vector<int>   &gpu_ids)        /* GPU IDs for
                                                     * constructing mdrun command lines */
  {
      int      i, nr, k, ret, count = 0, totaltests;
@@ -1522,11 +1485,9 @@ static void do_the_tests(
          /* Loop over various numbers of PME nodes: */
          for (i = 0; i < *pmeentries; i++)
          {
-            char *cmd_gpu_ids = nullptr;
-
              pd = &perfdata[k][i];
  
-            cmd_gpu_ids = make_gpu_id_command_line(nnodes, nPMEnodes[i], gpu_ids);
+            auto cmd_gpu_ids = make_gpu_id_command_line(nnodes, nPMEnodes[i], gpu_ids);
  
              /* Loop over the repeats for each scenario: */
              for (nr = 0; nr < repeats; nr++)
@@ -1538,7 +1499,7 @@ static void do_the_tests(
                   * at the end of the command line string */
                  snew(pd->mdrun_cmd_line, cmdline_length);
                  sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s %s",
-                        cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench, cmd_gpu_ids);
+                        cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench, cmd_gpu_ids.c_str());
  
                  /* To prevent that all benchmarks fail due to a show-stopper argument
                   * on the mdrun command line, we make a quick check first.
@@ -1636,7 +1597,6 @@ static void do_the_tests(
                      break;
                  }
              } /* end of repeats loop */
-            sfree(cmd_gpu_ids);
          }     /* end of -npme loop */
      }         /* end of tpr file loop */
  
@@ -2455,7 +2415,7 @@ int gmx_tune_pme(int argc, char *argv[])
                  bench_nsteps, fnm, NFILE, sim_part, presteps,
                  asize(pa), pa);
      /* Check any GPU IDs passed make sense, and fill the data structure for them */
-    auto gpu_ids = gmx::parseDigitsFromString(eligible_gpu_ids);
+    auto gpu_ids = gmx::parseUserGpuIds(eligible_gpu_ids);
  
      /* Determine the maximum and minimum number of PME nodes to test,
       * the actual list of settings is build in do_the_tests(). */
diff --git a/src/gromacs/taskassignment/CMakeLists.txt b/src/gromacs/taskassignment/CMakeLists.txt

index 08cbbfb6e929f0fe874bc1642e6dc5415da1cab2..67c7be03ae69812e81d53237417f7326a41646f0 100644 (file)
--- a/src/gromacs/taskassignment/CMakeLists.txt
+++ b/src/gromacs/taskassignment/CMakeLists.txt
@@ -35,8 +35,9 @@
  gmx_add_libgromacs_sources(
      hardwareassign.cpp
      resourcedivision.cpp
+    usergpuids.cpp
      )
  
  if (BUILD_TESTING)
-#    add_subdirectory(tests)
+    add_subdirectory(tests)
  endif()
diff --git a/src/gromacs/taskassignment/hardwareassign.cpp b/src/gromacs/taskassignment/hardwareassign.cpp

index 231d0a1d0514d66c73b245c4d56ec5751f7001be..19896f00c13fc4841b807172727640f7eefd2b45 100644 (file)
--- a/src/gromacs/taskassignment/hardwareassign.cpp
+++ b/src/gromacs/taskassignment/hardwareassign.cpp
@@ -50,7 +50,7 @@
  #include "gromacs/hardware/gpu_hw_info.h"
  #include "gromacs/hardware/hw_info.h"
  #include "gromacs/mdtypes/commrec.h"
-#include "gromacs/utility/basenetwork.h"
+#include "gromacs/taskassignment/usergpuids.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
@@ -65,32 +65,6 @@
  namespace gmx
  {
  
-std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment)
-{
-    std::vector<int> digits;
-    if (gpuTaskAssignment.empty())
-    {
-        return digits;
-    }
-
-    /* Parse a "plain" or comma-separated GPU ID string which contains
-     * a sequence of digits corresponding to GPU IDs; the order will
-     * indicate the assignment of GPU tasks on this node to GPU
-     * device IDs on this node. */
-    try
-    {
-        digits = parseDigitsFromString(gpuTaskAssignment);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
-
-    if (digits.empty())
-    {
-        gmx_fatal(FARGS, "Empty GPU ID string encountered.\n"
-                  "An empty, delimiter-free, or comma-separated sequence of valid numeric IDs of available GPUs is required.\n");
-    }
-    return digits;
-}
-
  /*! \brief This function is responsible for the automated mapping the
   * GPUs to the processes on a single node.
   *
@@ -156,59 +130,11 @@ static std::vector<int> assign_rank_gpu_ids(const std::vector<int> &compatibleGp
      return taskAssignment;
  }
  
-/*! \brief Check that all user-selected GPUs are compatible.
- *
- * Given the \c userGpuTaskAssignment and \c compatibleGPUs, give a fatal
- * error if any selected GPUs is not compatible
- *
- * The error is given with a suitable descriptive message, which will
- * have context if this check is done after the hardware detection
- * results have been reported to the user. However, note that only the
- * GPUs detected on the master rank are reported, because of the
- * existing limitations of that reporting.
- *
- * \todo Note that the selected GPUs can be different on each rank,
- * and the IDs of compatible GPUs can be different on each node, so
- * this routine ought to do communication to determine whether all
- * ranks are able to proceed. Currently this relies on the MPI runtime
- * to kill the other processes because GROMACS lacks the appropriate
- * infrastructure to do a good job of coordinating error messages and
- * behaviour across MPMD ranks and multiple simulations.
- *
- * \param[in]   gpu_info               GPU information including device description.
- * \param[in]   compatibleGpus         Vector of compatible GPUs
- * \param[in]   userGpuTaskAssignment  The GPU selection from the user.
- */
-static void exitUnlessUserGpuTaskAssignmentIsValid(const gmx_gpu_info_t   &gpu_info,
-                                                   const std::vector<int> &compatibleGpus,
-                                                   const std::vector<int> &userGpuTaskAssignment)
-{
-    int         numIncompatibleGpuIds = 0;
-    std::string message
-        = "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
-
-    for (const auto &gpuId : userGpuTaskAssignment)
-    {
-        if (std::find(compatibleGpus.begin(), compatibleGpus.end(), gpuId) == compatibleGpus.end())
-        {
-            numIncompatibleGpuIds++;
-            message += gmx::formatString("    GPU #%d: %s\n",
-                                         gpuId,
-                                         getGpuCompatibilityDescription(gpu_info, gpuId));
-        }
-    }
-
-    if (numIncompatibleGpuIds > 0)
-    {
-        gmx_fatal(FARGS, message.c_str());
-    }
-}
-
  std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
                                    const t_commrec        *cr,
                                    const gmx_gpu_info_t   &gpu_info,
                                    const std::vector<int> &compatibleGpus,
-                                  const gmx_hw_opt_t     &hw_opt)
+                                  const std::vector<int> &userGpuIds)
  {
      std::vector<int> taskAssignment;
  
@@ -217,11 +143,10 @@ std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
          return taskAssignment;
      }
  
-    if (!hw_opt.gpuIdTaskAssignment.empty())
+    if (!userGpuIds.empty())
      {
-        auto userGpuTaskAssignment = parseGpuTaskAssignment(hw_opt.gpuIdTaskAssignment);
-        exitUnlessUserGpuTaskAssignmentIsValid(gpu_info, compatibleGpus, userGpuTaskAssignment);
-        taskAssignment = userGpuTaskAssignment;
+        checkUserGpuIds(gpu_info, compatibleGpus, userGpuIds);
+        taskAssignment = userGpuIds;
      }
      else
      {
diff --git a/src/gromacs/taskassignment/hardwareassign.h b/src/gromacs/taskassignment/hardwareassign.h

index 0294d25d1ad83465daed6d1349151e07abbac293..5dc3498a52877a525be82489c8e285b93ab5d5ec 100644 (file)
--- a/src/gromacs/taskassignment/hardwareassign.h
+++ b/src/gromacs/taskassignment/hardwareassign.h
@@ -54,7 +54,7 @@
  #include "gromacs/utility/basedefinitions.h"
  
  struct gmx_gpu_info_t;
-struct gmx_hw_opt_t;
+struct gmx_hw_info_t;
  struct t_commrec;
  
  namespace gmx
@@ -78,7 +78,7 @@ std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment);
   *
   * Will return a validated mapping from PP ranks (ie tasks that can
   * run on GPUs) to the device IDs of compatible GPUs on their node.
- * This will be from any non-empty assignment in hw_opt, otherwise a
+ * This will be from any non-empty assignment in \c userGpuIds, otherwise a
   * default automated mapping is generated.
   *
   * Note that PME-only ranks have always ignored mdrun -gpu_id, so do
@@ -89,7 +89,7 @@ std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment);
   * \param[in]     cr                     Communication record.
   * \param[in]     gpu_info               Information detected about GPUs
   * \param[in]     compatibleGpus         Vector of GPUs that are compatible
- * \param[in]     hw_opt                 Parallelisation options, including any user-specified GPU task assignment.
+ * \param[in]     userGpuIds             The GPU ID task assignment string from the user.
   *
   * \returns  A valid GPU selection.
   */
@@ -97,7 +97,7 @@ std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
                                    const t_commrec        *cr,
                                    const gmx_gpu_info_t   &gpu_info,
                                    const std::vector<int> &compatibleGpus,
-                                  const gmx_hw_opt_t     &hw_opt);
+                                  const std::vector<int> &userGpuIds);
  
  } // namespace
  
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp

index bf468a2d7f5c5c8f23d7ee8f9218fcf16a81deb6..541cd79d03dc74957afe032fe08020326f3477e9 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -331,6 +331,7 @@ class SingleRankChecker
   */
  int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
                       gmx_hw_opt_t           *hw_opt,
+                     const std::vector<int> &userGpuIds,
                       int                     numPmeRanks,
                       bool                    nonbondedOnGpu,
                       const t_inputrec       *inputrec,
@@ -345,8 +346,7 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
      const gmx::HardwareTopology &hwTop   = *hwinfo->hardwareTopology;
  
      /* If the user made a GPU task assignment, that sets the number of thread-MPI ranks. */
-    auto userGpuTaskAssignment = gmx::parseGpuTaskAssignment(hw_opt->gpuIdTaskAssignment);
-    int  numGpuIdsSupplied     = static_cast<int>(userGpuTaskAssignment.size());
+    int  numGpuIdsSupplied = static_cast<int>(userGpuIds.size());
  
      /* TODO Here we handle the case where the user set GPU IDs, and
         further below we handle the case where the algorithm does not
diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h

index 3cc1eef8e9ca760b95114f3f14de5654f387b183..228a63e10db02c7da348cbcfb22036c81af16ba4 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.h
+++ b/src/gromacs/taskassignment/resourcedivision.h
@@ -71,6 +71,7 @@ class MDLogger;
   */
  int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
                       gmx_hw_opt_t           *hw_opt,
+                     const std::vector<int> &userGpuIds,
                       int                     numPmeRanks,
                       bool                    nonbondedOnGpu,
                       const t_inputrec       *inputrec,
diff --git a/src/gromacs/taskassignment/tests/CMakeLists.txt b/src/gromacs/taskassignment/tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..1525d1a
--- /dev/null
+++ b/src/gromacs/taskassignment/tests/CMakeLists.txt
@@ -0,0 +1,37 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2017, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+gmx_add_unit_test(TaskAssignmentUnitTests taskassignment-test
+                  usergpuids.cpp
+                  )
diff --git a/src/gromacs/taskassignment/tests/usergpuids.cpp b/src/gromacs/taskassignment/tests/usergpuids.cpp

new file mode 100644 (file)

index 0000000..c475677
--- /dev/null
+++ b/src/gromacs/taskassignment/tests/usergpuids.cpp
@@ -0,0 +1,142 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * Tests for NonbondedOnGpuFromUser
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_taskassignment
+ */
+#include "gmxpre.h"
+
+#include "gromacs/taskassignment/usergpuids.h"
+
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gromacs/utility/exceptions.h"
+
+namespace gmx
+{
+
+namespace test
+{
+namespace
+{
+
+TEST(GpuIdStringHandlingTest, ParsingAndReconstructionWork)
+{
+    using ::testing::UnorderedElementsAreArray;
+
+    // TODO It would be nicer to use EXPECT_THAT(assignment,
+    // UnorderedElementsAreArray({0,1}) but MSVC 2015 does
+    // not deduce the template arguments for <int, 2>.
+
+    // Test simple assignments and back mappings
+    {
+        const char *strings[] = { "01", "0,1", "0,1," };
+        for (const auto &s : strings)
+        {
+            auto assignment = parseUserGpuIds(s);
+            auto matcher    = UnorderedElementsAreArray<int, 2>({0, 1});
+            EXPECT_THAT(assignment, matcher) << "for string " << s;
+            EXPECT_EQ("0",     makeGpuIdString(assignment, 1));
+            EXPECT_EQ("0,1",   makeGpuIdString(assignment, 2));
+            EXPECT_EQ("0,0,1", makeGpuIdString(assignment, 3));
+        }
+    }
+    // Test an input that could be a single large index, or two small indices; and back mappings
+    {
+        auto assignment = parseUserGpuIds("11");
+        auto matcher    = UnorderedElementsAreArray<int, 2>({1, 1});
+        EXPECT_THAT(assignment, matcher);
+        EXPECT_EQ("1",     makeGpuIdString(assignment, 1));
+        EXPECT_EQ("1,1",   makeGpuIdString(assignment, 2));
+        EXPECT_EQ("1,1,1", makeGpuIdString(assignment, 3));
+    }
+    // Test an input that must be a single large index; and back mappings
+    {
+        auto assignment = parseUserGpuIds("11,");
+        auto matcher    = UnorderedElementsAreArray<int, 1>({11});
+        EXPECT_THAT(assignment, matcher);
+        EXPECT_EQ("11",       makeGpuIdString(assignment, 1));
+        EXPECT_EQ("11,11",    makeGpuIdString(assignment, 2));
+        EXPECT_EQ("11,11,11", makeGpuIdString(assignment, 3));
+    }
+    // Test multiple large indices; and back mappings
+    {
+        const char *strings[] = { "11,12", "11,12," };
+        for (const auto &s : strings)
+        {
+            auto assignment = parseUserGpuIds(s);
+            auto matcher    = UnorderedElementsAreArray<int, 2>({11, 12});
+            EXPECT_THAT(assignment, matcher) << "for string " << s;
+            EXPECT_EQ("11",       makeGpuIdString(assignment, 1));
+            EXPECT_EQ("11,12",    makeGpuIdString(assignment, 2));
+            EXPECT_EQ("11,11,12", makeGpuIdString(assignment, 3));
+        }
+    }
+}
+
+TEST(GpuIdStringHandlingTest, EmptyStringCanBeValid)
+{
+    using ::testing::IsEmpty;
+
+    auto assignment = parseUserGpuIds("");
+    EXPECT_THAT(assignment, IsEmpty());
+    EXPECT_EQ("", makeGpuIdString(assignment, 0));
+}
+
+TEST(GpuIdStringHandlingTest, InvalidInputsThrow)
+{
+    {
+        const char *strings[] = {
+            "a", "0a", ",01", ",0,1", ",0,1,",
+            ":0", "0a:1b", "0:1:2",
+            ",", ";", ":", "-", "=",
+        };
+        for (const auto &s : strings)
+        {
+            EXPECT_THROW(parseUserGpuIds(s), InvalidInputError) << "for string " << s;
+        }
+    }
+}
+
+} // namespace
+} // namespace
+} // namespace
diff --git a/src/gromacs/taskassignment/usergpuids.cpp b/src/gromacs/taskassignment/usergpuids.cpp

new file mode 100644 (file)

index 0000000..041e39f
--- /dev/null
+++ b/src/gromacs/taskassignment/usergpuids.cpp
@@ -0,0 +1,160 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Defines routines for handling user-specified GPU IDs.
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_taskassignment
+ */
+#include "gmxpre.h"
+
+#include "usergpuids.h"
+
+#include <cctype>
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/hw_info.h"
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/stringutil.h"
+
+namespace gmx
+{
+
+std::vector<int>
+parseUserGpuIds(const std::string &gpuIdString)
+{
+    // An optional comma is used to separate GPU IDs assigned to the
+    // same type of task, which will be useful for any nodes that have
+    // more than ten GPUs.
+
+    std::vector<int> digits;
+    auto             foundCommaDelimiters = gpuIdString.find(',') != std::string::npos;
+    if (!foundCommaDelimiters)
+    {
+        for (const auto &c : gpuIdString)
+        {
+            if (std::isdigit(c) == 0)
+            {
+                GMX_THROW(InvalidInputError(formatString("Invalid character in GPU ID string: \"%c\"\n", c)));
+            }
+            // Convert each character in the token to an integer
+            digits.push_back(c - '0');
+        }
+    }
+    else
+    {
+        if (gpuIdString[0] == ',')
+        {
+            GMX_THROW(InvalidInputError("Invalid use of leading comma in GPU ID string"));
+        }
+        std::istringstream ss(gpuIdString);
+        std::string        token;
+        digits.reserve(gpuIdString.length());
+        token.reserve(gpuIdString.length());
+        while (std::getline(ss, token, ','))
+        {
+            // Convert the whole token to an integer
+            if (token.empty())
+            {
+                GMX_THROW(InvalidInputError("Invalid use of comma in GPU ID string"));
+            }
+            digits.push_back(std::stoi(token));
+        }
+    }
+    return digits;
+}
+
+std::vector<int>
+makeGpuIds(const std::vector<int> &compatibleGpus,
+           size_t                  numGpuTasks)
+{
+    std::vector<int> gpuIdsToUse;
+
+    gpuIdsToUse.reserve(numGpuTasks);
+
+    auto currentGpuId = compatibleGpus.begin();
+    for (size_t i = 0; i != numGpuTasks; ++i)
+    {
+        GMX_ASSERT(!compatibleGpus.empty(), "Must have compatible GPUs from which to build a list of GPU IDs to use");
+        gpuIdsToUse.push_back(*currentGpuId);
+        ++currentGpuId;
+        if (currentGpuId == compatibleGpus.end())
+        {
+            // Wrap around and assign tasks again.
+            currentGpuId = compatibleGpus.begin();
+        }
+    }
+    std::sort(gpuIdsToUse.begin(), gpuIdsToUse.end());
+    return gpuIdsToUse;
+}
+
+std::string
+makeGpuIdString(const std::vector<int> &gpuIds,
+                int                     totalNumberOfTasks)
+{
+    auto resultGpuIds = makeGpuIds(gpuIds, totalNumberOfTasks);
+    return formatAndJoin(resultGpuIds, ",", StringFormatter("%d"));
+}
+
+void checkUserGpuIds(const gmx_gpu_info_t   &gpu_info,
+                     const std::vector<int> &compatibleGpus,
+                     const std::vector<int> &gpuIds)
+{
+    bool        foundIncompatibleGpuIds = false;
+    std::string message
+        = "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
+
+    for (const auto &gpuId : gpuIds)
+    {
+        if (std::find(compatibleGpus.begin(), compatibleGpus.end(), gpuId) == compatibleGpus.end())
+        {
+            foundIncompatibleGpuIds = true;
+            message                += gmx::formatString("    GPU #%d: %s\n",
+                                                        gpuId,
+                                                        getGpuCompatibilityDescription(gpu_info, gpuId));
+        }
+    }
+    if (foundIncompatibleGpuIds)
+    {
+        GMX_THROW(InconsistentInputError(message));
+    }
+}
+
+} // namespace
diff --git a/src/gromacs/taskassignment/usergpuids.h b/src/gromacs/taskassignment/usergpuids.h

new file mode 100644 (file)

index 0000000..280a819
--- /dev/null
+++ b/src/gromacs/taskassignment/usergpuids.h
@@ -0,0 +1,137 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \defgroup module_taskassignment Assigning simulation tasks to hardware (taskassignment)
+ * \ingroup group_mdrun
+ * \brief Provides code that manages assignment of simulation tasks to hardware.
+ */
+/*! \libinternal
+ * \file
+ * \brief Declares routines for handling user-specified GPU IDs.
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_taskassignment
+ * \inlibraryapi
+ */
+#ifndef GMX_TASKASSIGNMENT_USERGPUIDS_H
+#define GMX_TASKASSIGNMENT_USERGPUIDS_H
+
+#include <cstddef>
+
+#include <string>
+#include <vector>
+
+struct gmx_gpu_info_t;
+
+namespace gmx
+{
+
+/*! \brief Parse a GPU ID string into a container describing the task types and associated device IDs.
+ *
+ * \param[in]   gpuIdString  String like "013" or "0,1,3" typically
+ *                           supplied by the user to mdrun -gpu_id.
+ *                           Must contain only decimal digits, or only decimal
+ *                           digits separated by comma delimiters. A terminal
+ *                           comma is accceptable (and required to specify a
+ *                           single ID that is larger than 9).
+ *
+ * \returns  A vector of GPU ID task mappings, like { 0, 1, 3 }
+ *
+ * \throws   std::bad_alloc     If out of memory.
+ *           InvalidInputError  If an invalid character is found (ie not a digit or ',').
+ */
+std::vector<int>
+parseUserGpuIds(const std::string &gpuIdString);
+
+/*! \brief Make a vector containing \c numGpuTasks IDs of the IDs found in \c compatibleGpus.
+ *
+ * \throws  std::bad_alloc          If out of memory
+ *
+ * \returns A sorted vector of IDs of compatible vectors, whose
+ * length matches that of the number of GPU tasks required.
+ */
+std::vector<int>
+makeGpuIds(const std::vector<int> &compatibleGpus,
+           size_t                  numGpuTasks);
+
+/*! \brief Convert a container of GPU deviced IDs to a string that
+ * can be used by gmx tune_pme as input to mdrun -gpu_id.
+ *
+ * Produce a valid input for mdrun -gpu_id that refers to the device
+ * IDs in \c gpuIds but produces a mapping for \c
+ * totalNumberOfTasks tasks.
+ *
+ * \param[in]   gpuIds              Container of device IDs
+ * \param[in]   totalNumberOfTasks  Total number of tasks for the output mapping produced by the returned string.
+ *
+ * \returns  A string that is suitable to pass to mdrun -gpu_id.
+ *
+ * \throws   std::bad_alloc     If out of memory.
+ */
+std::string
+makeGpuIdString(const std::vector<int> &gpuIds, int totalNumberOfTasks);
+
+/*! \brief Check that all user-selected GPUs are compatible.
+ *
+ * Given the \c gpuIds and \c hardwareInfo, throw if
+ * any selected GPUs is not compatible.
+ *
+ * The error is given with a suitable descriptive message, which will
+ * have context if this check is done after the hardware detection
+ * results have been reported to the user. However, note that only the
+ * GPUs detected on the master rank are reported, because of the
+ * existing limitations of that reporting.
+ *
+ * \todo Note that the selected GPUs can be different on each rank,
+ * and the IDs of compatible GPUs can be different on each node, so
+ * this routine ought to do communication to determine whether all
+ * ranks are able to proceed. Currently this relies on the MPI runtime
+ * to kill the other processes because GROMACS lacks the appropriate
+ * infrastructure to do a good job of coordinating error messages and
+ * behaviour across MPMD ranks and multiple simulations.
+ *
+ * \param[in]   gpu_info        Information detected about GPUs
+ * \param[in]   compatibleGpus  Vector of GPUs that are compatible
+ * \param[in]   gpuIds          The GPU IDs selected by the user.
+ *
+ * \throws  std::bad_alloc          If out of memory
+ *          InconsistentInputError  If the assigned GPUs are not valid
+ */
+void checkUserGpuIds(const gmx_gpu_info_t   &gpu_info,
+                     const std::vector<int> &compatibleGpus,
+                     const std::vector<int> &gpuIds);
+
+} // namespace
+
+#endif
diff --git a/src/gromacs/utility/stringutil.cpp b/src/gromacs/utility/stringutil.cpp

index 386e40aaaf92999a90685bd4918e73d000c4c75c..6d3b19e0f6116136e0d9153058db9c467b6ff1e6 100644 (file)
--- a/src/gromacs/utility/stringutil.cpp
+++ b/src/gromacs/utility/stringutil.cpp
@@ -463,25 +463,4 @@ TextLineWrapper::wrapToVector(const std::string &input) const
      return result;
  }
  
-std::vector<int> parseDigitsFromString(const std::string &input)
-{
-    std::vector<int>   digits;
-    std::istringstream ss(input);
-    std::string        token;
-    digits.reserve(input.length());
-    token.reserve(input.length());
-    while (std::getline(ss, token, ','))
-    {
-        for (const auto &c : token)
-        {
-            if (std::isdigit(c) == 0)
-            {
-                GMX_THROW(InvalidInputError(formatString("Invalid character in digit-only string: \"%c\"\n", c)));
-            }
-            digits.push_back(c - '0');
-        }
-    }
-    return digits;
-}
-
  } // namespace gmx
diff --git a/src/gromacs/utility/stringutil.h b/src/gromacs/utility/stringutil.h

index 525037883833fb164fa86e3093db81425055834c..2c0be63baa9c37d52b6a027d2fbbde9be65d0fa6 100644 (file)
--- a/src/gromacs/utility/stringutil.h
+++ b/src/gromacs/utility/stringutil.h
@@ -686,18 +686,6 @@ class TextLineWrapper
          TextLineWrapperSettings settings_;
  };
  
-/*! \brief Construct a vector of decimal digits parsed from an \c input string.
- *
- * \param[in]  input  String that must contain only decimal digits, or only
- *                    decimal digits separated by comma delimiters.
- *
- * \returns           Vector of any digits found in \c input.
- *
- * \throws  std::bad_alloc if out of memory
- *          InvalidInputError if an invalid digit character is found.
- */
-std::vector<int> parseDigitsFromString(const std::string &input);
-
  //! \}
  
  } // namespace gmx
diff --git a/src/gromacs/utility/tests/stringutil.cpp b/src/gromacs/utility/tests/stringutil.cpp

index 1608f1909aec0818c27836da14ecc1525b0448d0..621544f6427a61f090d4f04f81dc015913d6fee9 100644 (file)
--- a/src/gromacs/utility/tests/stringutil.cpp
+++ b/src/gromacs/utility/tests/stringutil.cpp
@@ -431,22 +431,6 @@ TEST_F(TextLineWrapperTest, WrapsCorrectlyWithExtraWhitespace)
                "WrappedAt14WithTrailingWhitespace");
  }
  
-TEST(StringUtilityTest, ParseDigitsFromString)
-{
-    using ::testing::ElementsAre;
-    using ::testing::IsEmpty;
-    EXPECT_THAT(parseDigitsFromString("01"), ElementsAre(0, 1));
-    EXPECT_THAT(parseDigitsFromString("0,1"), ElementsAre(0, 1));
-    EXPECT_THAT(parseDigitsFromString(",0,1"), ElementsAre(0, 1));
-    EXPECT_THAT(parseDigitsFromString("0,1,"), ElementsAre(0, 1));
-    EXPECT_THAT(parseDigitsFromString(",0,1,"), ElementsAre(0, 1));
-    EXPECT_THAT(parseDigitsFromString(","), IsEmpty());
-    EXPECT_THAT(parseDigitsFromString(",,"), IsEmpty());
-    EXPECT_THAT(parseDigitsFromString(""), IsEmpty());
-    EXPECT_THROW(parseDigitsFromString("a"), InvalidInputError);
-    EXPECT_THROW(parseDigitsFromString("0a"), InvalidInputError);
-}
-
  } // namespace
  } // namespace
  } // namespace
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 93bdd871c8864b2d865300c85541bc836334549b..2e4b7237cc3a76e1062eab0a07595d6190943457 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -102,6 +102,7 @@
  #include "gromacs/pulling/pull_rotation.h"
  #include "gromacs/taskassignment/hardwareassign.h"
  #include "gromacs/taskassignment/resourcedivision.h"
+#include "gromacs/taskassignment/usergpuids.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/trajectory/trajectoryframe.h"
@@ -472,13 +473,20 @@ int Mdrunner::mdrunner()
       * count. */
      EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
                                                 EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
+    std::vector<int>    userGpuIds;
+    try
+    {
+        userGpuIds = parseUserGpuIds(hw_opt.gpuIdTaskAssignment);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
      bool                forceUseCpu           = (strncmp(nbpu_opt, "cpu", 3) == 0);
-    if (!hw_opt.gpuIdTaskAssignment.empty() && forceUseCpu)
+    if (!userGpuIds.empty() && forceUseCpu)
      {
          gmx_fatal(FARGS, "GPU IDs were specified, and short-ranged interactions were assigned to the CPU. Make no more than one of these choices.");
      }
-    bool forceUsePhysicalGpu = (strncmp(nbpu_opt, "gpu", 3) == 0) || !hw_opt.gpuIdTaskAssignment.empty();
-    bool tryUsePhysicalGpu   = (strncmp(nbpu_opt, "auto", 4) == 0) && hw_opt.gpuIdTaskAssignment.empty() && (emulateGpuNonbonded == EmulateGpuNonbonded::No);
+    bool forceUsePhysicalGpu = (strncmp(nbpu_opt, "gpu", 3) == 0) || !userGpuIds.empty();
+    bool tryUsePhysicalGpu   = (strncmp(nbpu_opt, "auto", 4) == 0) && userGpuIds.empty() && (emulateGpuNonbonded == EmulateGpuNonbonded::No);
      GMX_RELEASE_ASSERT(!(forceUsePhysicalGpu && tryUsePhysicalGpu), "Must either force use of "
                         "GPUs for short-ranged interactions, or try to use them, not both.");
      const PmeRunMode pmeRunMode = PmeRunMode::CPU;
@@ -584,6 +592,7 @@ int Mdrunner::mdrunner()
           * correctly. */
          hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                  &hw_opt,
+                                                userGpuIds,
                                                  domdecOptions.numPmeRanks,
                                                  nonbondedOnGpu,
                                                  inputrec, mtop,
@@ -869,17 +878,31 @@ int Mdrunner::mdrunner()
      std::vector<int> gpuTaskAssignment;
      if (nonbondedOnGpu)
      {
-        /* Currently the DD code assigns duty to ranks that can
-         * include PP work that currently can be executed on a single
-         * GPU, if present and compatible.  This has to be coordinated
-         * across PP ranks on a node, with possible multiple devices
-         * or sharing devices on a node, either from the user
-         * selection, or automatically. */
+        // Currently the DD code assigns duty to ranks that can
+        // include PP work that currently can be executed on a single
+        // GPU, if present and compatible.  This has to be coordinated
+        // across PP ranks on a node, with possible multiple devices
+        // or sharing devices on a node, either from the user
+        // selection, or automatically.
+        //
+        // GPU ID assignment strings, if provided, cover all the ranks on
+        // a node. If nodes or the process placement on them are
+        // heterogeneous, then the GMX_GPU_ID environment variable must be
+        // set by a user who also wishes to direct GPU ID assignment.
+        // Thus the implementation of task assignment can assume it has a
+        // GPU ID assignment appropriate for the node upon which its
+        // process is running.
+        //
+        // Valid GPU ID assignments are an ordered set of digits that
+        // identify GPU device IDs (e.g. as understood by the GPU runtime,
+        // and subject to environment modification such as with
+        // CUDA_VISIBLE_DEVICES) that will be used for the GPU-suitable
+        // tasks on all of the ranks of that node.
          bool rankCanUseGpu = thisRankHasDuty(cr, DUTY_PP);
-        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hwinfo->compatibleGpus, hw_opt);
+        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hwinfo->compatibleGpus, userGpuIds);
      }
  
-    reportGpuUsage(mdlog, hwinfo->gpu_info, !hw_opt.gpuIdTaskAssignment.empty(),
+    reportGpuUsage(mdlog, hwinfo->gpu_info, !userGpuIds.empty(),
                     gpuTaskAssignment, cr->nrank_pp_intranode, cr->nnodes > 1);
  
      if (!gpuTaskAssignment.empty())
author	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 25 Oct 2017 10:08:01 +0000 (12:08 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 1 Nov 2017 11:11:17 +0000 (12:11 +0100)
src/gromacs/gmxana/gmx_tune_pme.cpp		patch \| blob \| history
src/gromacs/taskassignment/CMakeLists.txt		patch \| blob \| history
src/gromacs/taskassignment/hardwareassign.cpp		patch \| blob \| history
src/gromacs/taskassignment/hardwareassign.h		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.cpp		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.h		patch \| blob \| history
src/gromacs/taskassignment/tests/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/gromacs/taskassignment/tests/usergpuids.cpp	[new file with mode: 0644]	patch \| blob
src/gromacs/taskassignment/usergpuids.cpp	[new file with mode: 0644]	patch \| blob
src/gromacs/taskassignment/usergpuids.h	[new file with mode: 0644]	patch \| blob
src/gromacs/utility/stringutil.cpp		patch \| blob \| history
src/gromacs/utility/stringutil.h		patch \| blob \| history
src/gromacs/utility/tests/stringutil.cpp		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history