Task assignment for bonded interactions on CUDA GPUs
authorMark Abraham <mark.j.abraham@gmail.com>
Thu, 11 Oct 2018 19:53:38 +0000 (21:53 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Tue, 16 Oct 2018 07:36:50 +0000 (09:36 +0200)
Made a query function to find whether any interactions of supported
times exist in the global topology, so that we can make efficient
high-level decisions.

Added free for gpuBondedLists pointer.

Minor cleanup in manage-threading.h

Fixes #2679

Change-Id: I0ebbbd33c2cba5808561111b0ec6160bfd2f840d

16 files changed:
src/api/cpp/context.cpp
src/gromacs/ewald/pme.h
src/gromacs/listed-forces/manage-threading.cpp
src/gromacs/listed-forces/manage-threading.h
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/forcerec.h
src/gromacs/mdrun/legacymdrunoptions.h
src/gromacs/mdrun/runner.cpp
src/gromacs/mdrun/runner.h
src/gromacs/taskassignment/decidegpuusage.cpp
src/gromacs/taskassignment/decidegpuusage.h
src/gromacs/taskassignment/reportgpuusage.cpp
src/gromacs/taskassignment/reportgpuusage.h
src/gromacs/taskassignment/taskassignment.cpp
src/gromacs/taskassignment/taskassignment.h
src/programs/mdrun/mdrun.cpp

index c5a8024d990baa3879004ae70929eb430480f9ed..9bfd993c282319555080f35b4dc040dac2593834 100644 (file)
@@ -187,6 +187,7 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow &work)
         builder.addNonBonded(options_.nbpu_opt_choices[0]);
         // \todo pass by value
         builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]);
+        builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]);
         builder.addNeighborList(options_.nstlist_cmdline);
         builder.addReplicaExchange(options_.replExParams);
         // \todo take ownership of multisim resources (ms)
index 81988e6bc9ce65beef8971891655cb9bc19991bd..ccf5e7227d929cd34b2f63733eeb4dd827abfbc9 100644 (file)
@@ -88,7 +88,7 @@ enum {
 /*! \brief Possible PME codepaths on a rank.
  * \todo: make this enum class with gmx_pme_t C++ refactoring
  */
-enum PmeRunMode
+enum class PmeRunMode
 {
     None,    //!< No PME task is done
     CPU,     //!< Whole PME computation is done on CPU
index 1541edd81a314f5ea6b0086030254e02968fa77d..7c50b774aa798615c277e984e954304829e33961 100644 (file)
 
 #include <algorithm>
 #include <array>
+#include <string>
 
 #include "gromacs/listed-forces/listed-forces.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/topology/ifunc.h"
+#include "gromacs/topology/topology.h"
+#include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxassert.h"
@@ -248,6 +252,8 @@ static void divide_bondeds_over_threads(bonded_threading_t *bt,
         int            nrToAssignToCpuThreads = il.nr;
 
         if (useGpuForBondeds &&
+                     // TODO remove the next line when we have GPU bonded kernels
+            false && // NOLINT readability-simplify-boolean-expr
             ftypeGpuIndex < ftypesOnGpu.size() &&
             ftypesOnGpu[ftypeGpuIndex] == ftype)
         {
@@ -370,6 +376,109 @@ static void convertIlistToNbnxnOrder(const t_ilist            &src,
     }
 }
 
+namespace gmx
+{
+
+//! Returns whether there are any interactions suitable for a GPU.
+static bool someInteractionsCanRunOnGpu(const InteractionLists &ilists)
+{
+    for (int ftype : ftypesOnGpu)
+    {
+        if (!ilists[ftype].iatoms.empty())
+        {
+            // Perturbation is not implemented in the GPU bonded
+            // kernels. If all the interactions were actually
+            // perturbed, then that will be detected later on each
+            // domain, and work will never run on the GPU. This is
+            // very unlikely to occur, and has little run-time cost,
+            // so we don't complicate the code by catering for it
+            // here.
+            return true;
+        }
+    }
+    return false;
+}
+
+//! Returns whether there are any interactions suitable for a GPU.
+static bool bondedInteractionsCanRunOnGpu(const gmx_mtop_t &mtop)
+{
+    // Check the regular molecule types
+    for (const auto &moltype : mtop.moltype)
+    {
+        if (someInteractionsCanRunOnGpu(moltype.ilist))
+        {
+            return true;
+        }
+    }
+    // Check the inter-molecular interactions.
+    if (mtop.intermolecular_ilist)
+    {
+        if (someInteractionsCanRunOnGpu(*mtop.intermolecular_ilist))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*! \brief Help build a descriptive message in \c error if there are
+ * \c errorReasons why bondeds on a GPU are not supported.
+ *
+ * \returns Whether the lack of errorReasons indicate there is support. */
+static bool
+addMessageIfNotSupported(ArrayRef <const std::string> errorReasons,
+                         std::string                 *error)
+{
+    bool isSupported = errorReasons.empty();
+    if (!isSupported && error)
+    {
+        *error  = "Bonded interactions cannot run on GPUs: ";
+        *error += joinStrings(errorReasons, "; ") + ".";
+    }
+    return isSupported;
+}
+
+bool buildSupportsGpuBondeds(std::string *error)
+{
+    std::vector<std::string> errorReasons;
+    if (GMX_DOUBLE)
+    {
+        errorReasons.emplace_back("not supported with double precision");
+    }
+    if (GMX_GPU == GMX_GPU_OPENCL)
+    {
+        errorReasons.emplace_back("not supported with OpenCL build of GROMACS");
+    }
+    else if (GMX_GPU == GMX_GPU_NONE)
+    {
+        errorReasons.emplace_back("not supported with CPU-only build of GROMACS");
+    }
+    return addMessageIfNotSupported(errorReasons, error);
+}
+
+bool inputSupportsGpuBondeds(const t_inputrec &ir,
+                             const gmx_mtop_t &mtop,
+                             std::string      *error)
+{
+    std::vector<std::string> errorReasons;
+
+    if (!bondedInteractionsCanRunOnGpu(mtop))
+    {
+        errorReasons.emplace_back("No supported bonded interactions are present");
+    }
+    if (ir.cutoff_scheme == ecutsGROUP)
+    {
+        errorReasons.emplace_back("group cutoff scheme");
+    }
+    if (!EI_DYNAMICS(ir.eI))
+    {
+        errorReasons.emplace_back("not a dynamical integrator");
+    }
+    return addMessageIfNotSupported(errorReasons, error);
+}
+
+} // namespace gmx
+
 //! Divides bonded interactions over threads and GPU
 void assign_bondeds_to_gpu(GpuBondedLists           *gpuBondedLists,
                            gmx::ArrayRef<const int>  nbnxnAtomOrder,
index 47dd87435cf34b8d967a74e7a94a177fe34ac7bd..960bcace4ed11f17617ece0b4356346c6844d30f 100644 (file)
 
 #include <cstdio>
 
-#include "gromacs/mdtypes/forcerec.h"
+#include <string>
+
 #include "gromacs/topology/idef.h"
 #include "gromacs/utility/arrayref.h"
 
+struct bonded_threading_t;
+struct gmx_mtop_t;
+struct t_inputrec;
+
 /*! \internal \brief Struct for storing lists of bonded interaction for evaluation on a GPU */
 struct GpuBondedLists
 {
@@ -57,6 +62,34 @@ struct GpuBondedLists
     bool             haveInteractions; /**< Tells whether there are any interaction in iLists */
 };
 
+
+namespace gmx
+{
+
+/*! \brief Checks whether the GROMACS build allows to compute bonded interactions on a GPU.
+ *
+ * \param[out] error  If non-null, the diagnostic message when bondeds cannot run on a GPU.
+ *
+ * \returns true when this build can run bonded interactions on a GPU, false otherwise.
+ *
+ * \throws std::bad_alloc when out of memory.
+ */
+bool buildSupportsGpuBondeds(std::string *error);
+
+/*! \brief Checks whether the input system allows to compute bonded interactions on a GPU.
+ *
+ * \param[in]  ir     Input system.
+ * \param[in]  mtop   Complete system topology to search for supported interactions.
+ * \param[out] error  If non-null, the error message if the input is not supported on GPU.
+ *
+ * \returns true if PME can run on GPU with this input, false otherwise.
+ */
+bool inputSupportsGpuBondeds(const t_inputrec &ir,
+                             const gmx_mtop_t &mtop,
+                             std::string      *error);
+
+}   // namespace gmx
+
 /*! \brief Copy bonded interactions assigned to the GPU to \p gpuBondedLists */
 void assign_bondeds_to_gpu(GpuBondedLists           *gpuBondedLists,
                            gmx::ArrayRef<const int>  nbnxnAtomOrder,
@@ -83,6 +116,6 @@ void tear_down_bonded_threading(bonded_threading_t *bt);
  * A pointer to this struct is returned as \p *bb_ptr.
  */
 void init_bonded_threading(FILE *fplog, int nenergrp,
-                           struct bonded_threading_t **bt_ptr);
+                           bonded_threading_t **bt_ptr);
 
 #endif
index f718d22cbfb2b5102b22aa796d1665b4b37ae780..2d931d28a4700a84bf01e86c59cb30768e7a28a8 100644 (file)
@@ -2315,6 +2315,7 @@ void init_forcerec(FILE                             *fp,
                    gmx::ArrayRef<const std::string>  tabbfnm,
                    const gmx_hw_info_t              &hardwareInfo,
                    const gmx_device_info_t          *deviceInfo,
+                   const bool                        useGpuForBonded,
                    gmx_bool                          bNoSolvOpt,
                    real                              print_force)
 {
@@ -3052,15 +3053,10 @@ void init_forcerec(FILE                             *fp,
     init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
                           &fr->bondedThreading);
 
-    // TODO: Replace this condition by the GPU bonded task boolean
-    if (fr->cutoff_scheme == ecutsVERLET && getenv("GMX_TEST_GPU_BONDEDS"))
+    if (useGpuForBonded)
     {
         fr->gpuBondedLists = new GpuBondedLists;
     }
-    else
-    {
-        fr->gpuBondedLists = nullptr;
-    }
 
     fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
     snew(fr->ewc_t, fr->nthread_ewc);
@@ -3152,6 +3148,7 @@ void done_forcerec(t_forcerec *fr, int numMolBlocks, int numEnergyGroups)
     done_ns(fr->ns, numEnergyGroups);
     sfree(fr->ewc_t);
     tear_down_bonded_threading(fr->bondedThreading);
+    delete fr->gpuBondedLists;
     fr->bondedThreading = nullptr;
     sfree(fr);
 }
index 64e60c7332eadef06b624f466ea6a92cfc24cc97..1f0694699bf242262f79eb12bfcc55987dee8a06 100644 (file)
@@ -115,6 +115,7 @@ void init_interaction_const_tables(FILE                   *fp,
  * \param[in]  tabbfnm     Table potential files for bonded interactions
  * \param[in]  hardwareInfo  Information about hardware
  * \param[in]  deviceInfo  Info about GPU device to use for short-ranged work
+ * \param[in]  useGpuForBonded  Whether bonded interactions will run on a GPU
  * \param[in]  bNoSolvOpt  Do not use solvent optimization
  * \param[in]  print_force Print forces for atoms with force >= print_force
  */
@@ -131,6 +132,7 @@ void init_forcerec(FILE                             *fplog,
                    gmx::ArrayRef<const std::string>  tabbfnm,
                    const gmx_hw_info_t              &hardwareInfo,
                    const gmx_device_info_t          *deviceInfo,
+                   bool                              useGpuForBonded,
                    gmx_bool                          bNoSolvOpt,
                    real                              print_force);
 
index 7584f2da072bc9e2483b91bd97466fc8a8f7e944..637cd2a6b93dabe9fd632d92e30794922c3a8fec 100644 (file)
@@ -155,13 +155,15 @@ class LegacyMdrunOptions
         { nullptr, "auto", "cpu", "gpu", nullptr };
         const char       *pme_fft_opt_choices[5] =
         { nullptr, "auto", "cpu", "gpu", nullptr };
+        const char       *bonded_opt_choices[5] =
+        { nullptr, "auto", "cpu", "gpu", nullptr };
         gmx_bool          bTryToAppendFiles     = TRUE;
         const char       *gpuIdsAvailable       = "";
         const char       *userGpuTaskAssignment = "";
 
         ImdOptions       &imdOptions = mdrunOptions.imdOptions;
 
-        t_pargs           pa[47] = {
+        t_pargs           pa[48] = {
 
             { "-dd",      FALSE, etRVEC, {&realddxyz},
               "Domain decomposition grid, 0 is optimize" },
@@ -224,6 +226,8 @@ class LegacyMdrunOptions
               "Perform PME calculations on" },
             { "-pmefft", FALSE, etENUM, {pme_fft_opt_choices},
               "Perform PME FFT calculations on" },
+            { "-bonded",     FALSE, etENUM, {bonded_opt_choices},
+              "Perform bonded calculations on" },
             { "-v",       FALSE, etBOOL, {&mdrunOptions.verbose},
               "Be loud and noisy" },
             { "-pforce",  FALSE, etREAL, {&pforce},
index 5bc87e3cc8944e812b51fb25dbf072bd93c4f024..ce9b5b8eb3a1601bc7f1ddc4e95016450810f788 100644 (file)
@@ -75,6 +75,7 @@
 #include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/printhardware.h"
 #include "gromacs/listed-forces/disre.h"
+#include "gromacs/listed-forces/manage-threading.h"
 #include "gromacs/listed-forces/orires.h"
 #include "gromacs/math/functions.h"
 #include "gromacs/math/utilities.h"
@@ -185,6 +186,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
     newRunner.nbpu_opt            = nbpu_opt;
     newRunner.pme_opt             = pme_opt;
     newRunner.pme_fft_opt         = pme_fft_opt;
+    newRunner.bonded_opt          = bonded_opt;
     newRunner.nstlist_cmdline     = nstlist_cmdline;
     newRunner.replExParams        = replExParams;
     newRunner.pforce              = pforce;
@@ -473,6 +475,7 @@ int Mdrunner::mdrunner()
     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
     auto       pmeTarget       = findTaskTarget(pme_opt);
     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
+    auto       bondedTarget    = findTaskTarget(bonded_opt);
     PmeRunMode pmeRunMode      = PmeRunMode::None;
 
     // Here we assume that SIMMASTER(cr) does not change even after the
@@ -665,23 +668,34 @@ int Mdrunner::mdrunner()
     // having an assertion?
     //
     // Note that these variables describe only their own node.
+    //
+    // Note that when bonded interactions run on a GPU they always run
+    // alongside a nonbonded task, so do not influence task assignment
+    // even though they affect the force calculation schedule.
     bool useGpuForNonbonded = false;
     bool useGpuForPme       = false;
+    bool useGpuForBonded    = false;
     try
     {
         // It's possible that there are different numbers of GPUs on
         // different nodes, which is the user's responsibilty to
         // handle. If unsuitable, we will notice that during task
         // assignment.
-        bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
+        bool gpusWereDetected  = hwinfo->ngpu_compatible_tot > 0;
+        bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET;
         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
-                                                                emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
+                                                                emulateGpuNonbonded, usingVerletScheme,
                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
         auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                     gpusWereDetected);
+        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
+        useGpuForBonded =
+            decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme,
+                                            bondedTarget, canUseGpuForBonded, cr->nnodes,
+                                            domdecOptions.numPmeRanks, gpusWereDetected);
 
         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
         if (pmeRunMode == PmeRunMode::GPU)
@@ -984,7 +998,7 @@ int Mdrunner::mdrunner()
     // Note that in general useGpuForNonbonded, etc. can have a value
     // that is inconsistent with the presence of actual GPUs on any
     // rank, and that is not known to be a problem until the
-    // duty of the ranks on a node become node.
+    // duty of the ranks on a node become known.
     //
     // TODO Later we might need the concept of computeTasksOnThisRank,
     // from which we construct gpuTasksOnThisRank.
@@ -1001,6 +1015,8 @@ int Mdrunner::mdrunner()
     {
         if (useGpuForNonbonded)
         {
+            // Note that any bonded tasks on a GPU always accompany a
+            // non-bonded task.
             if (haveGpus)
             {
                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
@@ -1009,6 +1025,10 @@ int Mdrunner::mdrunner()
             {
                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
             }
+            else if (bondedTarget == TaskTarget::Gpu)
+            {
+                gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected.");
+            }
         }
     }
     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
@@ -1032,7 +1052,8 @@ int Mdrunner::mdrunner()
     {
         // Produce the task assignment for this rank.
         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank);
+                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
+                                              useGpuForBonded, pmeRunMode);
     }
     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 
@@ -1198,6 +1219,7 @@ int Mdrunner::mdrunner()
                       opt2fn("-tablep", filenames.size(), filenames.data()),
                       opt2fns("-tableb", filenames.size(), filenames.data()),
                       *hwinfo, nonbondedDeviceInfo,
+                      useGpuForBonded,
                       FALSE,
                       pforce);
 
@@ -1553,6 +1575,8 @@ class Mdrunner::BuilderImplementation
 
         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
 
+        void addBondedTaskAssignment(const char* bonded_opt);
+
         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
 
         void addFilenames(ArrayRef <const t_filenm> filenames);
@@ -1572,6 +1596,7 @@ class Mdrunner::BuilderImplementation
         const char* nbpu_opt_    = nullptr;
         const char* pme_opt_     = nullptr;
         const char* pme_fft_opt_ = nullptr;
+        const char *bonded_opt_  = nullptr;
 
         MdrunOptions                          mdrunOptions_;
 
@@ -1729,6 +1754,15 @@ Mdrunner Mdrunner::BuilderImplementation::build()
         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
     }
 
+    if (bonded_opt_)
+    {
+        newRunner.bonded_opt = bonded_opt_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
+    }
+
     newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>();
 
     if (stopHandlerBuilder_)
@@ -1755,6 +1789,11 @@ void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
     pme_fft_opt_ = pme_fft_opt;
 }
 
+void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
+{
+    bonded_opt_ = bonded_opt;
+}
+
 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
 {
     hardwareOptions_ = hardwareOptions;
@@ -1841,6 +1880,12 @@ MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
     return *this;
 }
 
+MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
+{
+    impl_->addBondedTaskAssignment(bonded_opt);
+    return *this;
+}
+
 Mdrunner MdrunnerBuilder::build()
 {
     return impl_->build();
index 75ee32f51b7c3696c41579e99a71d685b57d77a3..465fdd27f159bed229f161b9222d320f8e300eb5 100644 (file)
@@ -233,6 +233,14 @@ class Mdrunner
          * \todo replace with string or enum class and initialize with sensible value.
          */
         const char                             *pme_fft_opt = nullptr;
+
+        /*! \brief Target bonded interations for "cpu", "gpu", or "auto". Default is "auto".
+         *
+         * \internal
+         * \todo replace with string or enum class and initialize with sensible value.
+         */
+        const char                             *bonded_opt = nullptr;
+
         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
         int                                     nstlist_cmdline = 0;
         //! Parameters for replica-exchange simulations.
@@ -403,6 +411,28 @@ class MdrunnerBuilder final
         MdrunnerBuilder &addElectrostatics(const char* pme_opt,
                                            const char* pme_fft_opt);
 
+        /*!
+         * \brief Assign responsibility for tasks for bonded interactions.
+         *
+         * Required. Director code should provide valid options for
+         * bonded interaction task assignment, whether or not such
+         * interactions are present. The builder does not apply any
+         * defaults, so client code should be prepared to provide
+         * (e.g.) "auto" in the event no user input or logic provides
+         * an alternative argument.
+         *
+         * \param bonded_opt Target bonded interactions for "cpu", "gpu", or "auto".
+         *
+         * Calling must guarantee that the pointed-to C strings are valid through
+         * simulation launch.
+         *
+         * \internal
+         * The arguments are passed as references to elements of arrays of C strings.
+         * \todo Replace with modern strings or (better) enum classes.
+         * \todo Make optional and/or encapsulate into task assignment module.
+         */
+        MdrunnerBuilder &addBondedTaskAssignment(const char *bonded_opt);
+
         /*!
          * \brief Provide access to the multisim communicator to use.
          *
index 10bcbb8f500ca40a9bd0ccd97d7986310227ce46..dbd9dd67460e535aa28572c5f06a6d35c61d4fbd 100644 (file)
@@ -408,4 +408,70 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
     return false;
 }
 
+bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
+                                     const bool /*useGpuForPme*/,
+                                     const bool       usingVerletScheme,
+                                     const TaskTarget bondedTarget,
+                                     const bool       canUseGpuForBonded,
+                                     const int        /*numRanksPerSimulation*/,
+                                     const int        /*numPmeRanksPerSimulation*/,
+                                     const bool       gpusWereDetected)
+{
+    if (bondedTarget == TaskTarget::Cpu)
+    {
+        return false;
+    }
+
+    if (!usingVerletScheme)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, which requires using "
+                          "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    if (!canUseGpuForBonded)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, but not supported for these "
+                          "simulation settings. Change your settings, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    if (!useGpuForNonbonded)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, but this requires that "
+                          "short-ranged non-bonded interactions are also run on the GPU. Change "
+                          "your settings, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    // TODO If the bonded kernels do not get fused, then performance
+    // overheads might suggest alternative choices here.
+
+    if (bondedTarget == TaskTarget::Gpu)
+    {
+        // We still don't know whether it is an error if no GPUs are
+        // found.
+        return true;
+    }
+
+    // If we get here, then the user permitted GPUs, which we should
+    // use for bonded interactions if any were detected.
+    return gpusWereDetected;
+}
+
 }  // namespace gmx
index 0730d52b9a0e13d586760055d88cb1b9b3454fee..9b8279cea775c32388623fe454c51bd0987639a8 100644 (file)
@@ -186,6 +186,30 @@ bool decideWhetherToUseGpusForPme(bool                    useGpuForNonbonded,
                                   int                     numPmeRanksPerSimulation,
                                   bool                    gpusWereDetected);
 
+/*! \brief Decide whether the simulation will try to run bonded tasks on GPUs.
+ *
+ * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
+ * \param[in]  useGpuForPme              Whether GPUs will be used for PME interactions.
+ * \param[in]  usingVerletScheme         Whether the nonbondeds are using the Verlet scheme.
+ * \param[in]  bondedTarget              The user's choice for mdrun -bonded for where to assign tasks.
+ * \param[in]  canUseGpuForBonded        Whether the bonded interactions can run on a GPU
+ * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
+ * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
+ * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
+ *
+ * \returns    Whether the simulation will run bondeded tasks on GPUs.
+ *
+ * \throws     std::bad_alloc          If out of memory
+ *             InconsistentInputError  If the user requirements are inconsistent. */
+bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
+                                     bool       useGpuForPme,
+                                     bool       usingVerletScheme,
+                                     TaskTarget bondedTarget,
+                                     bool       canUseGpuForBonded,
+                                     int        numRanksPerSimulation,
+                                     int        numPmeRanksPerSimulation,
+                                     bool       gpusWereDetected);
+
 }  // namespace gmx
 
 #endif
index e1cb200375a28e241d46d1c0c7f2ffe1df85af80..70f50476c527f68599950238eb9fc5eb21cacf06 100644 (file)
@@ -45,6 +45,7 @@
 #include <set>
 #include <string>
 
+#include "gromacs/ewald/pme.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/logger.h"
@@ -84,7 +85,9 @@ reportGpuUsage(const MDLogger                &mdlog,
                const GpuTaskAssignments      &gpuTaskAssignmentOnRanksOfThisNode,
                size_t                         numGpuTasksOnThisNode,
                size_t                         numRanks,
-               bool                           bPrintHostName)
+               bool                           bPrintHostName,
+               bool                           useGpuForBonded,
+               PmeRunMode                     pmeRunMode)
 {
     size_t numGpusInUse = countUniqueGpuIdsUsed(gpuTaskAssignmentOnRanksOfThisNode);
     if (numGpusInUse == 0)
@@ -133,6 +136,17 @@ reportGpuUsage(const MDLogger                &mdlog,
                                     numRanks,
                                     (numRanks > 1) ? "s" : "",
                                     gpuIdsString.c_str());
+        // Because there is a GPU in use, there must be a PP task on a GPU.
+        output += gmx::formatString("PP tasks will do short-ranged%s interactions on the GPU\n",
+                                    useGpuForBonded ? "and most bonded" : "");
+        if (pmeRunMode == PmeRunMode::Mixed)
+        {
+            output += gmx::formatString("PME tasks will do only spread and gather on the GPU\n");
+        }
+        else if (pmeRunMode == PmeRunMode::GPU)
+        {
+            output += gmx::formatString("PME tasks will do all aspects on the GPU\n");
+        }
     }
 
     /* NOTE: this print is only for and on one physical node */
index 400cdeb165d34c7a268de0066bf788a6b9ba64fa..fa26ddddf1b0e38a8c00074b52028ba3d4d8b296 100644 (file)
@@ -50,6 +50,8 @@
 
 #include "gromacs/taskassignment/taskassignment.h"
 
+enum class PmeRunMode;
+
 namespace gmx
 {
 
@@ -67,6 +69,8 @@ class MDLogger;
  * \param[in]  numGpuTasksOnThisNode               The number of GPU tasks on this node.
  * \param[in]  numPpRanks                          Number of PP ranks on this node
  * \param[in]  bPrintHostName                      Print the hostname in the usage information
+ * \param[in]  useGpuForBonded                     Whether GPU PP tasks will do bonded work on the GPU
+ * \param[in]  pmeRunMode                          Describes the execution of PME tasks
  *
  * \throws     std::bad_alloc if out of memory */
 void
@@ -75,8 +79,9 @@ reportGpuUsage(const MDLogger                &mdlog,
                const GpuTaskAssignments      &gpuTaskAssignmentOnRanksOfThisNode,
                size_t                         numGpuTasksOnThisNode,
                size_t                         numPpRanks,
-               bool                           bPrintHostName);
-
+               bool                           bPrintHostName,
+               bool                           useGpuForBonded,
+               PmeRunMode                     pmeRunMode);
 
 }  // namespace gmx
 
index 136587e3cb92ae5613351f8dc62bf3bc40634ffc..785a93a8044b50b34380157a55abd8190136144d 100644 (file)
@@ -190,7 +190,9 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
                   const t_commrec                *cr,
                   const gmx_multisim_t           *ms,
                   const PhysicalNodeCommunicator &physicalNodeComm,
-                  const std::vector<GpuTask>     &gpuTasksOnThisRank)
+                  const std::vector<GpuTask>     &gpuTasksOnThisRank,
+                  bool                            useGpuForBonded,
+                  PmeRunMode                      pmeRunMode)
 {
     /* Communicate among ranks on this node to find each task that can
      * be executed on a GPU, on each rank. */
@@ -307,7 +309,8 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
     }
 
     reportGpuUsage(mdlog, !userGpuTaskAssignment.empty(), taskAssignmentOnRanksOfThisNode,
-                   numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1);
+                   numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1,
+                   useGpuForBonded, pmeRunMode);
 
     // If the user chose a task assignment, give them some hints where appropriate.
     if (!userGpuTaskAssignment.empty())
index 7c8250a5645691da8d6afdd278967e3ee172ff27..0c57951d0512bcd25bda2ed097b8dbd1f0b1c016 100644 (file)
@@ -56,6 +56,8 @@ struct gmx_hw_info_t;
 struct gmx_multisim_t;
 struct t_commrec;
 
+enum class PmeRunMode;
+
 namespace gmx
 {
 
@@ -108,6 +110,8 @@ using GpuTaskAssignments = std::vector<GpuTaskAssignment>;
  * \param[in]  physicalNodeComm           Communication object for this physical node.
  * \param[in]  gpuTasksOnThisRank         Information about what GPU tasks
  *                                        exist on this rank.
+ * \param[in]  useGpuForBonded            Whether GPU PP tasks will do bonded work on the GPU
+ * \param[in]  pmeRunMode                 Describes the execution of PME tasks
  *
  * \returns  A GPU task assignment for this rank.
  *
@@ -122,7 +126,9 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
                   const t_commrec                *cr,
                   const gmx_multisim_t           *ms,
                   const PhysicalNodeCommunicator &physicalNodeComm,
-                  const std::vector<GpuTask>     &gpuTasksOnThisRank);
+                  const std::vector<GpuTask>     &gpuTasksOnThisRank,
+                  bool                            useGpuForBonded,
+                  PmeRunMode                      pmeRunMode);
 
 //! Function for whether the task of \c mapping has value \c TaskType.
 template<GpuTask TaskType>
index 6e568a4a57f2c81b53fc1520bc1d5ad7b9bb1e6f..b7ee9fc40ba21ba58b9ff638a96755628b4b60ac 100644 (file)
@@ -271,6 +271,7 @@ int gmx_mdrun(int argc, char *argv[])
     builder.addNonBonded(options.nbpu_opt_choices[0]);
     // \todo pass by value
     builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]);
+    builder.addBondedTaskAssignment(options.bonded_opt_choices[0]);
     builder.addNeighborList(options.nstlist_cmdline);
     builder.addReplicaExchange(options.replExParams);
     // \todo take ownership of multisim resources (ms)