Task assignment for bonded interactions on CUDA GPUs
[alexxy/gromacs.git] / src / gromacs / mdrun / runner.cpp
index 5bc87e3cc8944e812b51fb25dbf072bd93c4f024..ce9b5b8eb3a1601bc7f1ddc4e95016450810f788 100644 (file)
@@ -75,6 +75,7 @@
 #include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/printhardware.h"
 #include "gromacs/listed-forces/disre.h"
+#include "gromacs/listed-forces/manage-threading.h"
 #include "gromacs/listed-forces/orires.h"
 #include "gromacs/math/functions.h"
 #include "gromacs/math/utilities.h"
@@ -185,6 +186,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
     newRunner.nbpu_opt            = nbpu_opt;
     newRunner.pme_opt             = pme_opt;
     newRunner.pme_fft_opt         = pme_fft_opt;
+    newRunner.bonded_opt          = bonded_opt;
     newRunner.nstlist_cmdline     = nstlist_cmdline;
     newRunner.replExParams        = replExParams;
     newRunner.pforce              = pforce;
@@ -473,6 +475,7 @@ int Mdrunner::mdrunner()
     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
     auto       pmeTarget       = findTaskTarget(pme_opt);
     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
+    auto       bondedTarget    = findTaskTarget(bonded_opt);
     PmeRunMode pmeRunMode      = PmeRunMode::None;
 
     // Here we assume that SIMMASTER(cr) does not change even after the
@@ -665,23 +668,34 @@ int Mdrunner::mdrunner()
     // having an assertion?
     //
     // Note that these variables describe only their own node.
+    //
+    // Note that when bonded interactions run on a GPU they always run
+    // alongside a nonbonded task, so do not influence task assignment
+    // even though they affect the force calculation schedule.
     bool useGpuForNonbonded = false;
     bool useGpuForPme       = false;
+    bool useGpuForBonded    = false;
     try
     {
         // It's possible that there are different numbers of GPUs on
         // different nodes, which is the user's responsibilty to
         // handle. If unsuitable, we will notice that during task
         // assignment.
-        bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
+        bool gpusWereDetected  = hwinfo->ngpu_compatible_tot > 0;
+        bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET;
         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
-                                                                emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
+                                                                emulateGpuNonbonded, usingVerletScheme,
                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
         auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                     gpusWereDetected);
+        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
+        useGpuForBonded =
+            decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme,
+                                            bondedTarget, canUseGpuForBonded, cr->nnodes,
+                                            domdecOptions.numPmeRanks, gpusWereDetected);
 
         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
         if (pmeRunMode == PmeRunMode::GPU)
@@ -984,7 +998,7 @@ int Mdrunner::mdrunner()
     // Note that in general useGpuForNonbonded, etc. can have a value
     // that is inconsistent with the presence of actual GPUs on any
     // rank, and that is not known to be a problem until the
-    // duty of the ranks on a node become node.
+    // duty of the ranks on a node become known.
     //
     // TODO Later we might need the concept of computeTasksOnThisRank,
     // from which we construct gpuTasksOnThisRank.
@@ -1001,6 +1015,8 @@ int Mdrunner::mdrunner()
     {
         if (useGpuForNonbonded)
         {
+            // Note that any bonded tasks on a GPU always accompany a
+            // non-bonded task.
             if (haveGpus)
             {
                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
@@ -1009,6 +1025,10 @@ int Mdrunner::mdrunner()
             {
                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
             }
+            else if (bondedTarget == TaskTarget::Gpu)
+            {
+                gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected.");
+            }
         }
     }
     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
@@ -1032,7 +1052,8 @@ int Mdrunner::mdrunner()
     {
         // Produce the task assignment for this rank.
         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank);
+                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
+                                              useGpuForBonded, pmeRunMode);
     }
     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 
@@ -1198,6 +1219,7 @@ int Mdrunner::mdrunner()
                       opt2fn("-tablep", filenames.size(), filenames.data()),
                       opt2fns("-tableb", filenames.size(), filenames.data()),
                       *hwinfo, nonbondedDeviceInfo,
+                      useGpuForBonded,
                       FALSE,
                       pforce);
 
@@ -1553,6 +1575,8 @@ class Mdrunner::BuilderImplementation
 
         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
 
+        void addBondedTaskAssignment(const char* bonded_opt);
+
         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
 
         void addFilenames(ArrayRef <const t_filenm> filenames);
@@ -1572,6 +1596,7 @@ class Mdrunner::BuilderImplementation
         const char* nbpu_opt_    = nullptr;
         const char* pme_opt_     = nullptr;
         const char* pme_fft_opt_ = nullptr;
+        const char *bonded_opt_  = nullptr;
 
         MdrunOptions                          mdrunOptions_;
 
@@ -1729,6 +1754,15 @@ Mdrunner Mdrunner::BuilderImplementation::build()
         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
     }
 
+    if (bonded_opt_)
+    {
+        newRunner.bonded_opt = bonded_opt_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
+    }
+
     newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>();
 
     if (stopHandlerBuilder_)
@@ -1755,6 +1789,11 @@ void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
     pme_fft_opt_ = pme_fft_opt;
 }
 
+void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
+{
+    bonded_opt_ = bonded_opt;
+}
+
 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
 {
     hardwareOptions_ = hardwareOptions;
@@ -1841,6 +1880,12 @@ MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
     return *this;
 }
 
+MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
+{
+    impl_->addBondedTaskAssignment(bonded_opt);
+    return *this;
+}
+
 Mdrunner MdrunnerBuilder::build()
 {
     return impl_->build();