Fix mdrun -nb auto -pme auto when GPUs are absent

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 5 Dec 2017 09:27:21 +0000 (20:27 +1100)

committer Kasson <kasson@gmail.com>

Tue, 5 Dec 2017 13:10:07 +0000 (14:10 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 5 Dec 2017 09:27:21 +0000 (20:27 +1100)
committer Kasson <kasson@gmail.com>
Tue, 5 Dec 2017 13:10:07 +0000 (14:10 +0100)
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index 6002bb913bb0639fabe885db7fd735b29aec4f11..d5132add212e7c986de4c39332978081643d1468 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -233,11 +233,11 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
  }
  
  bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
-                                        const std::vector<int>    &gpuIdsToUse,
                                          const std::vector<int>    &userGpuTaskAssignment,
                                          const EmulateGpuNonbonded  emulateGpuNonbonded,
                                          const bool                 usingVerletScheme,
-                                        const bool                 nonbondedOnGpuIsUseful)
+                                        const bool                 nonbondedOnGpuIsUseful,
+                                        const bool                 gpusWereDetected)
  {
      if (nonbondedTarget == TaskTarget::Cpu)
      {
@@ -262,7 +262,7 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
                            ("Nonbonded interactions on the GPU were required, which is inconsistent "
                            "with choosing emulation. Make no more than one of these choices."));
          }
-        if (!gpuIdsToUse.empty() || !userGpuTaskAssignment.empty())
+        if (!userGpuTaskAssignment.empty())
          {
              GMX_THROW(InconsistentInputError
                            ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
@@ -306,13 +306,18 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
          return true;
      }
  
-    // We still don't know whether it is an error if no GPUs are found
-    // because we don't know the duty of this rank, yet. For example,
-    // a node with only PME ranks and -pme cpu is OK if there are not
-    // GPUs.
+    if (nonbondedTarget == TaskTarget::Gpu)
+    {
+        // We still don't know whether it is an error if no GPUs are found
+        // because we don't know the duty of this rank, yet. For example,
+        // a node with only PME ranks and -pme cpu is OK if there are not
+        // GPUs.
+        return true;
+    }
  
-    // If we get here, then the user permitted or required GPUs.
-    return true;
+    // If we get here, then the user permitted GPUs, which we should
+    // use for nonbonded interactions.
+    return gpusWereDetected;
  }
  
  bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
@@ -320,7 +325,8 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                    const std::vector<int> &userGpuTaskAssignment,
                                    const bool              canUseGpuForPme,
                                    const int               numRanksPerSimulation,
-                                  const int               numPmeRanksPerSimulation)
+                                  const int               numPmeRanksPerSimulation,
+                                  const bool              gpusWereDetected)
  {
      if (pmeTarget == TaskTarget::Cpu)
      {
@@ -389,11 +395,13 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
          return true;
      }
  
+    // If we get here, then the user permitted GPUs.
      if (numRanksPerSimulation == 1)
      {
-        // PME can run well on a single GPU shared with NB when
-        // there is one rank, so we permit mdrun to try that.
-        return true;
+        // PME can run well on a single GPU shared with NB when there
+        // is one rank, so we permit mdrun to try that if we have
+        // detected GPUs.
+        return gpusWereDetected;
      }
  
      // Not enough support for PME on GPUs for anything else
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index 438e00b8a6e48a23e6d95ade1a3ad95ed37db12c..7e01c99eabf3e9f01fb8dd62396125b4a27a08a3 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -133,22 +133,22 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
   * consistency checks.
   *
   * \param[in]  nonbondedTarget           The user's choice for mdrun -nb for where to assign short-ranged nonbonded interaction tasks.
- * \param[in]  gpuIdsToUse               The compatible GPUs that the user permitted us to use.
   * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
   * \param[in]  emulateGpuNonbonded       Whether we will emulate GPU calculation of nonbonded interactions.
   * \param[in]  usingVerletScheme         Whether the nonbondeds are using the Verlet scheme.
   * \param[in]  nonbondedOnGpuIsUseful    Whether computing nonbonded interactions on a GPU is useful for this calculation.
+ * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
   *
   * \returns    Whether the simulation will run nonbonded and PME tasks, respectively, on GPUs.
   *
   * \throws     std::bad_alloc          If out of memory
   *             InconsistentInputError  If the user requirements are inconsistent. */
  bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
-                                        const std::vector<int>    &gpuIdsToUse,
                                          const std::vector<int>    &userGpuTaskAssignment,
                                          const EmulateGpuNonbonded  emulateGpuNonbonded,
                                          const bool                 usingVerletScheme,
-                                        const bool                 nonbondedOnGpuIsUseful);
+                                        const bool                 nonbondedOnGpuIsUseful,
+                                        const bool                 gpusWereDetected);
  
  /*! \brief Decide whether the simulation will try to run tasks of
   * different types on GPUs.
@@ -172,6 +172,7 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
   * \param[in]  canUseGpuForPme           Whether the form of PME chosen can run on a GPU
   * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
   * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
+ * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
   *
   * \returns    Whether the simulation will run nonbonded and PME tasks, respectively, on GPUs.
   *
@@ -182,7 +183,8 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                    const std::vector<int> &userGpuTaskAssignment,
                                    const bool              canUseGpuForPme,
                                    const int               numRanksPerSimulation,
-                                  const int               numPmeRanksPerSimulation);
+                                  const int               numPmeRanksPerSimulation,
+                                  const bool              gpusWereDetected);
  
  }
  
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index d0d7a0cb8b5668300740854d07a17a63b0502408..7dabeb32e73c1ecb7c4c22040af56282bcf3b7d0 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -647,12 +647,20 @@ int Mdrunner::mdrunner()
      bool useGpuForPme       = false;
      try
      {
-        useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment,
+        // It's possible that there are different numbers of GPUs on
+        // different nodes, which is the user's responsibilty to
+        // handle. If unsuitable, we will notice that during task
+        // assignment.
+        bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
+        useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
                                                                  emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
-                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun));
+                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun),
+                                                                gpusWereDetected);
          auto inputSystemHasPme = EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype);
          auto canUseGpuForPme   = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
-        useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks);
+        useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
+                                                    canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
+                                                    gpusWereDetected);
          pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
          if ((pmeRunMode == PmeRunMode::GPU) && (pmeFftTarget == TaskTarget::Cpu))
          {
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 5 Dec 2017 09:27:21 +0000 (20:27 +1100)
committer	Kasson <kasson@gmail.com>
	Tue, 5 Dec 2017 13:10:07 +0000 (14:10 +0100)
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history