Improved diagnostics when PME cannot run on GPUs
authorMark Abraham <mark.j.abraham@gmail.com>
Thu, 14 Feb 2019 16:01:25 +0000 (17:01 +0100)
committerPaul Bauer <paul.bauer.q@gmail.com>
Fri, 15 Feb 2019 11:00:57 +0000 (12:00 +0100)
Fixes #2789, #2823

Change-Id: Iff7609a3774c11329d36bd8557b741a4d29f7c5a

src/gromacs/ewald/pme.cpp
src/gromacs/ewald/pme.h
src/gromacs/ewald/tests/pmetestcommon.cpp
src/gromacs/ewald/tests/testhardwarecontexts.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/taskassignment/decidegpuusage.cpp
src/gromacs/taskassignment/decidegpuusage.h
src/gromacs/taskassignment/resourcedivision.cpp
src/programs/mdrun/tests/pmetest.cpp

index fab79704649cff8be4fedd79c54a1970b67a37d6..d687e855cbb229a5a51d1164b1f731ddbe835190 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -133,33 +133,48 @@ static bool
 addMessageIfNotSupported(const std::list<std::string> &errorReasons,
                          std::string                  *error)
 {
-    bool foundErrorReasons = errorReasons.empty();
-    if (!foundErrorReasons && error)
+    bool isSupported = errorReasons.empty();
+    if (!isSupported && error)
     {
         std::string regressionTestMarker = "PME GPU does not support";
         // this prefix is tested for in the regression tests script gmxtest.pl
-        *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + ".";
+        *error = regressionTestMarker;
+        if (errorReasons.size() == 1)
+        {
+            *error += " " + errorReasons.back();
+        }
+        else
+        {
+            *error += ": " + gmx::joinStrings(errorReasons, "; ");
+        }
+        *error += ".";
     }
-    return foundErrorReasons;
+    return isSupported;
 }
 
-bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
-                            std::string         *error)
+bool pme_gpu_supports_build(std::string *error)
 {
     std::list<std::string> errorReasons;
     if (GMX_DOUBLE)
     {
-        errorReasons.emplace_back("double precision");
+        errorReasons.emplace_back("a double-precision build");
     }
     if (GMX_GPU == GMX_GPU_NONE)
     {
-        errorReasons.emplace_back("non-GPU build of GROMACS");
+        errorReasons.emplace_back("a non-GPU build");
     }
+    return addMessageIfNotSupported(errorReasons, error);
+}
+
+bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo,
+                               std::string         *error)
+{
+    std::list<std::string> errorReasons;
     if (GMX_GPU == GMX_GPU_OPENCL)
     {
         if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info))
         {
-            errorReasons.emplace_back("only AMD devices are supported");
+            errorReasons.emplace_back("non-AMD devices");
         }
     }
     return addMessageIfNotSupported(errorReasons, error);
index 5a4956a8b7c21a047feffa284a837a2e57228f5e..f4dffeed6ac6031d82b6efa94b4fb38bedd95ed8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -251,14 +251,22 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges)
  * TODO: this partly duplicates an internal PME assert function
  * pme_gpu_check_restrictions(), except that works with a
  * formed gmx_pme_t structure. Should that one go away/work with inputrec?
+ *
+ * \param[out] error   If non-null, the error message when PME is not supported on GPU.
+ *
+ * \returns true if PME can run on GPU on this build, false otherwise.
+ */
+bool pme_gpu_supports_build(std::string *error);
+
+/*! \brief Checks whether the detected (GPU) hardware allows to run PME on GPU.
  *
  * \param[in]  hwinfo  Information about the detected hardware
  * \param[out] error   If non-null, the error message when PME is not supported on GPU.
  *
  * \returns true if PME can run on GPU on this build, false otherwise.
  */
-bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
-                            std::string         *error);
+bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo,
+                               std::string         *error);
 
 /*! \brief Checks whether the input system allows to run PME on GPU.
  * TODO: this partly duplicates an internal PME assert function
index 6fa0627777e4bdff414a3daf1f51aee713d37564..7475de602e6d2ea4d4373274321b00b353bb35ad 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -85,7 +85,8 @@ bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
             break;
 
         case CodePath::GPU:
-            implemented = (pme_gpu_supports_build(hwinfo, nullptr) &&
+            implemented = (pme_gpu_supports_build(nullptr) &&
+                           pme_gpu_supports_hardware(hwinfo, nullptr) &&
                            pme_gpu_supports_input(*inputRec, mtop, nullptr));
             break;
 
index b4486fc7caa3d467153ce6f1ea6a170b3d8f7771..5cbbcc311951ab6a4620288092dbdabd36c00943 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -112,7 +112,8 @@ void PmeTestEnvironment::SetUp()
     hardwareContexts_.emplace_back(compat::make_unique<TestHardwareContext>(CodePath::CPU, "", nullptr));
 
     hardwareInfo_ = hardwareInit();
-    if (!pme_gpu_supports_build(*hardwareInfo_, nullptr))
+    if (!pme_gpu_supports_build(nullptr) ||
+        !pme_gpu_supports_hardware(*hardwareInfo_, nullptr))
     {
         // PME can only run on the CPU, so don't make any more test contexts.
         return;
index 110c9e3c3720b31a89c1e9317dd674f2383000cd..ae158a4984deefd1d61ca9e2a912143db1c75e05 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -621,10 +621,9 @@ int Mdrunner::mdrunner()
                     inputrec->cutoff_scheme == ecutsVERLET,
                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
-            auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
-                    canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
+                    *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 
         }
         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
@@ -692,9 +691,9 @@ int Mdrunner::mdrunner()
                                                                 usingVerletScheme,
                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
-        auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
-                                                    canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
+                                                    *hwinfo, *inputrec, mtop,
+                                                    cr->nnodes, domdecOptions.numPmeRanks,
                                                     gpusWereDetected);
         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
         useGpuForBonded =
index 8e74f9b0872cebae8854770b08175478032226d3..b1f2a547df87634928d19257ffd7656bd39cc5a3 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,7 @@
 #include <algorithm>
 #include <string>
 
+#include "gromacs/ewald/pme.h"
 #include "gromacs/hardware/cpuinfo.h"
 #include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/hardwaretopology.h"
@@ -153,14 +154,18 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
                                           const TaskTarget        pmeTarget,
                                           const std::vector<int> &gpuIdsToUse,
                                           const std::vector<int> &userGpuTaskAssignment,
-                                          const bool              canUseGpuForPme,
+                                          const gmx_hw_info_t    &hardwareInfo,
+                                          const t_inputrec       &inputrec,
+                                          const gmx_mtop_t       &mtop,
                                           const int               numRanksPerSimulation,
                                           const int               numPmeRanksPerSimulation)
 {
     // First, exclude all cases where we can't run PME on GPUs.
     if ((pmeTarget == TaskTarget::Cpu) ||
         !useGpuForNonbonded ||
-        !canUseGpuForPme)
+        !pme_gpu_supports_build(nullptr) ||
+        !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
+        !pme_gpu_supports_input(inputrec, mtop, nullptr))
     {
         // PME can't run on a GPU. If the user required that, we issue
         // an error later.
@@ -335,7 +340,9 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                   const TaskTarget        pmeTarget,
                                   const std::vector<int> &userGpuTaskAssignment,
-                                  const bool              canUseGpuForPme,
+                                  const gmx_hw_info_t    &hardwareInfo,
+                                  const t_inputrec       &inputrec,
+                                  const gmx_mtop_t       &mtop,
                                   const int               numRanksPerSimulation,
                                   const int               numPmeRanksPerSimulation,
                                   const bool              gpusWereDetected)
@@ -350,18 +357,36 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
         if (pmeTarget == TaskTarget::Gpu)
         {
             GMX_THROW(NotImplementedError
-                          ("The PME on the GPU is only supported when nonbonded interactions run on GPUs also."));
+                          ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
         }
         return false;
     }
 
-    if (!canUseGpuForPme)
+    std::string message;
+    if (!pme_gpu_supports_build(&message))
     {
         if (pmeTarget == TaskTarget::Gpu)
         {
-            // TODO Pass in the inputrec so we can give more help here?
             GMX_THROW(NotImplementedError
-                          ("The input simulation did not use PME in a way that is supported on the GPU."));
+                          ("Cannot compute PME interactions on a GPU, because " + message));
+        }
+        return false;
+    }
+    if (!pme_gpu_supports_hardware(hardwareInfo, &message))
+    {
+        if (pmeTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(NotImplementedError
+                          ("Cannot compute PME interactions on a GPU, because " + message));
+        }
+        return false;
+    }
+    if (!pme_gpu_supports_input(inputrec, mtop, &message))
+    {
+        if (pmeTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(NotImplementedError
+                          ("Cannot compute PME interactions on a GPU, because " + message));
         }
         return false;
     }
@@ -383,7 +408,7 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
         // Specifying -gputasks requires specifying everything.
         if (pmeTarget == TaskTarget::Auto)
         {
-            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
+            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
         }
 
         return true;
index 34e61ffa9284bafb42da9d761200548990e33906..02dcac5512571f55d3b46188d015d8406d483e8d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -46,6 +46,8 @@
 #include <vector>
 
 struct gmx_hw_info_t;
+struct gmx_mtop_t;
+struct t_inputrec;
 
 enum class EmulateGpuNonbonded : bool;
 
@@ -102,7 +104,9 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget              non
  * \param[in]  pmeTarget                 The user's choice for mdrun -pme for where to assign long-ranged PME nonbonded interaction tasks.
  * \param[in]  gpuIdsToUse               The compatible GPUs that the user permitted us to use.
  * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
- * \param[in]  canUseGpuForPme           Whether the form of PME chosen can run on a GPU
+ * \param[in]  hardwareInfo              Hardware information
+ * \param[in]  inputrec                  The user input
+ * \param[in]  mtop                      Global system topology
  * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
  * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
  *
@@ -114,7 +118,9 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(bool                    useGpuFor
                                                TaskTarget              pmeTarget,
                                                const std::vector<int> &gpuIdsToUse,
                                                const std::vector<int> &userGpuTaskAssignment,
-                                               bool                    canUseGpuForPme,
+                                               const gmx_hw_info_t    &hardwareInfo,
+                                               const t_inputrec       &inputrec,
+                                               const gmx_mtop_t       &mtop,
                                                int                     numRanksPerSimulation,
                                                int                     numPmeRanksPerSimulation);
 
@@ -173,7 +179,9 @@ bool decideWhetherToUseGpusForNonbonded(TaskTarget              nonbondedTarget,
  * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
  * \param[in]  pmeTarget                 The user's choice for mdrun -pme for where to assign long-ranged PME nonbonded interaction tasks.
  * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
- * \param[in]  canUseGpuForPme           Whether the form of PME chosen can run on a GPU
+ * \param[in]  hardwareInfo              Hardware information
+ * \param[in]  inputrec                  The user input
+ * \param[in]  mtop                      Global system topology
  * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
  * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
  * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
@@ -185,7 +193,9 @@ bool decideWhetherToUseGpusForNonbonded(TaskTarget              nonbondedTarget,
 bool decideWhetherToUseGpusForPme(bool                    useGpuForNonbonded,
                                   TaskTarget              pmeTarget,
                                   const std::vector<int> &userGpuTaskAssignment,
-                                  bool                    canUseGpuForPme,
+                                  const gmx_hw_info_t    &hardwareInfo,
+                                  const t_inputrec       &inputrec,
+                                  const gmx_mtop_t       &mtop,
                                   int                     numRanksPerSimulation,
                                   int                     numPmeRanksPerSimulation,
                                   bool                    gpusWereDetected);
index 5fa35b34eb1ede13742df5a94d322535ed7153b6..09d02464a6f4f45abf7ee9fe5ad1671e4b80fe0e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -356,7 +356,9 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
     if (pmeOnGpu)
     {
         GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) &&
-                           pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
+                           pme_gpu_supports_build(nullptr) &&
+                           pme_gpu_supports_hardware(*hwinfo, nullptr) &&
+                           pme_gpu_supports_input(*inputrec, *mtop, nullptr),
                            "PME can't be on GPUs unless we are using PME");
 
         // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
index c2c734c53d049c2f4be81f49cb83dca96739eeab..960ccbf276b7cae493b6d6f9cab372dac221f9e0 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -147,7 +147,9 @@ void PmeTest::runTest(const RunModesList &runModes)
             continue;
         }
         auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos);
-        if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr))
+        if (modeTargetsPmeOnGpus &&
+            !(pme_gpu_supports_build(nullptr) &&
+              pme_gpu_supports_hardware(*hardwareInfo_, nullptr)))
         {
             // This run mode will cause a fatal error from mdrun when
             // it finds an unsuitable device, which is not something