Task assignment for bonded interactions on CUDA GPUs

author Mark Abraham <mark.j.abraham@gmail.com>

Thu, 11 Oct 2018 19:53:38 +0000 (21:53 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Tue, 16 Oct 2018 07:36:50 +0000 (09:36 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Thu, 11 Oct 2018 19:53:38 +0000 (21:53 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Tue, 16 Oct 2018 07:36:50 +0000 (09:36 +0200)
diff --git a/src/api/cpp/context.cpp b/src/api/cpp/context.cpp

index c5a8024d990baa3879004ae70929eb430480f9ed..9bfd993c282319555080f35b4dc040dac2593834 100644 (file)
--- a/src/api/cpp/context.cpp
+++ b/src/api/cpp/context.cpp
@@ -187,6 +187,7 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow &work)
          builder.addNonBonded(options_.nbpu_opt_choices[0]);
          // \todo pass by value
          builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]);
+        builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]);
          builder.addNeighborList(options_.nstlist_cmdline);
          builder.addReplicaExchange(options_.replExParams);
          // \todo take ownership of multisim resources (ms)
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 81988e6bc9ce65beef8971891655cb9bc19991bd..ccf5e7227d929cd34b2f63733eeb4dd827abfbc9 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -88,7 +88,7 @@ enum {
  /*! \brief Possible PME codepaths on a rank.
   * \todo: make this enum class with gmx_pme_t C++ refactoring
   */
-enum PmeRunMode
+enum class PmeRunMode
  {
      None,    //!< No PME task is done
      CPU,     //!< Whole PME computation is done on CPU
diff --git a/src/gromacs/listed-forces/manage-threading.cpp b/src/gromacs/listed-forces/manage-threading.cpp

index 1541edd81a314f5ea6b0086030254e02968fa77d..7c50b774aa798615c277e984e954304829e33961 100644 (file)
--- a/src/gromacs/listed-forces/manage-threading.cpp
+++ b/src/gromacs/listed-forces/manage-threading.cpp
@@ -54,11 +54,15 @@
  
  #include <algorithm>
  #include <array>
+#include <string>
  
  #include "gromacs/listed-forces/listed-forces.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/topology/ifunc.h"
+#include "gromacs/topology/topology.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
@@ -248,6 +252,8 @@ static void divide_bondeds_over_threads(bonded_threading_t *bt,
          int            nrToAssignToCpuThreads = il.nr;
  
          if (useGpuForBondeds &&
+                     // TODO remove the next line when we have GPU bonded kernels
+            false && // NOLINT readability-simplify-boolean-expr
              ftypeGpuIndex < ftypesOnGpu.size() &&
              ftypesOnGpu[ftypeGpuIndex] == ftype)
          {
@@ -370,6 +376,109 @@ static void convertIlistToNbnxnOrder(const t_ilist            &src,
      }
  }
  
+namespace gmx
+{
+
+//! Returns whether there are any interactions suitable for a GPU.
+static bool someInteractionsCanRunOnGpu(const InteractionLists &ilists)
+{
+    for (int ftype : ftypesOnGpu)
+    {
+        if (!ilists[ftype].iatoms.empty())
+        {
+            // Perturbation is not implemented in the GPU bonded
+            // kernels. If all the interactions were actually
+            // perturbed, then that will be detected later on each
+            // domain, and work will never run on the GPU. This is
+            // very unlikely to occur, and has little run-time cost,
+            // so we don't complicate the code by catering for it
+            // here.
+            return true;
+        }
+    }
+    return false;
+}
+
+//! Returns whether there are any interactions suitable for a GPU.
+static bool bondedInteractionsCanRunOnGpu(const gmx_mtop_t &mtop)
+{
+    // Check the regular molecule types
+    for (const auto &moltype : mtop.moltype)
+    {
+        if (someInteractionsCanRunOnGpu(moltype.ilist))
+        {
+            return true;
+        }
+    }
+    // Check the inter-molecular interactions.
+    if (mtop.intermolecular_ilist)
+    {
+        if (someInteractionsCanRunOnGpu(*mtop.intermolecular_ilist))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*! \brief Help build a descriptive message in \c error if there are
+ * \c errorReasons why bondeds on a GPU are not supported.
+ *
+ * \returns Whether the lack of errorReasons indicate there is support. */
+static bool
+addMessageIfNotSupported(ArrayRef <const std::string> errorReasons,
+                         std::string                 *error)
+{
+    bool isSupported = errorReasons.empty();
+    if (!isSupported && error)
+    {
+        *error  = "Bonded interactions cannot run on GPUs: ";
+        *error += joinStrings(errorReasons, "; ") + ".";
+    }
+    return isSupported;
+}
+
+bool buildSupportsGpuBondeds(std::string *error)
+{
+    std::vector<std::string> errorReasons;
+    if (GMX_DOUBLE)
+    {
+        errorReasons.emplace_back("not supported with double precision");
+    }
+    if (GMX_GPU == GMX_GPU_OPENCL)
+    {
+        errorReasons.emplace_back("not supported with OpenCL build of GROMACS");
+    }
+    else if (GMX_GPU == GMX_GPU_NONE)
+    {
+        errorReasons.emplace_back("not supported with CPU-only build of GROMACS");
+    }
+    return addMessageIfNotSupported(errorReasons, error);
+}
+
+bool inputSupportsGpuBondeds(const t_inputrec &ir,
+                             const gmx_mtop_t &mtop,
+                             std::string      *error)
+{
+    std::vector<std::string> errorReasons;
+
+    if (!bondedInteractionsCanRunOnGpu(mtop))
+    {
+        errorReasons.emplace_back("No supported bonded interactions are present");
+    }
+    if (ir.cutoff_scheme == ecutsGROUP)
+    {
+        errorReasons.emplace_back("group cutoff scheme");
+    }
+    if (!EI_DYNAMICS(ir.eI))
+    {
+        errorReasons.emplace_back("not a dynamical integrator");
+    }
+    return addMessageIfNotSupported(errorReasons, error);
+}
+
+} // namespace gmx
+
  //! Divides bonded interactions over threads and GPU
  void assign_bondeds_to_gpu(GpuBondedLists           *gpuBondedLists,
                             gmx::ArrayRef<const int>  nbnxnAtomOrder,
diff --git a/src/gromacs/listed-forces/manage-threading.h b/src/gromacs/listed-forces/manage-threading.h

index 47dd87435cf34b8d967a74e7a94a177fe34ac7bd..960bcace4ed11f17617ece0b4356346c6844d30f 100644 (file)
--- a/src/gromacs/listed-forces/manage-threading.h
+++ b/src/gromacs/listed-forces/manage-threading.h
@@ -46,10 +46,15 @@
  
  #include <cstdio>
  
-#include "gromacs/mdtypes/forcerec.h"
+#include <string>
+
  #include "gromacs/topology/idef.h"
  #include "gromacs/utility/arrayref.h"
  
+struct bonded_threading_t;
+struct gmx_mtop_t;
+struct t_inputrec;
+
  /*! \internal \brief Struct for storing lists of bonded interaction for evaluation on a GPU */
  struct GpuBondedLists
  {
@@ -57,6 +62,34 @@ struct GpuBondedLists
      bool             haveInteractions; /**< Tells whether there are any interaction in iLists */
  };
  
+
+namespace gmx
+{
+
+/*! \brief Checks whether the GROMACS build allows to compute bonded interactions on a GPU.
+ *
+ * \param[out] error  If non-null, the diagnostic message when bondeds cannot run on a GPU.
+ *
+ * \returns true when this build can run bonded interactions on a GPU, false otherwise.
+ *
+ * \throws std::bad_alloc when out of memory.
+ */
+bool buildSupportsGpuBondeds(std::string *error);
+
+/*! \brief Checks whether the input system allows to compute bonded interactions on a GPU.
+ *
+ * \param[in]  ir     Input system.
+ * \param[in]  mtop   Complete system topology to search for supported interactions.
+ * \param[out] error  If non-null, the error message if the input is not supported on GPU.
+ *
+ * \returns true if PME can run on GPU with this input, false otherwise.
+ */
+bool inputSupportsGpuBondeds(const t_inputrec &ir,
+                             const gmx_mtop_t &mtop,
+                             std::string      *error);
+
+}   // namespace gmx
+
  /*! \brief Copy bonded interactions assigned to the GPU to \p gpuBondedLists */
  void assign_bondeds_to_gpu(GpuBondedLists           *gpuBondedLists,
                             gmx::ArrayRef<const int>  nbnxnAtomOrder,
@@ -83,6 +116,6 @@ void tear_down_bonded_threading(bonded_threading_t *bt);
   * A pointer to this struct is returned as \p *bb_ptr.
   */
  void init_bonded_threading(FILE *fplog, int nenergrp,
-                           struct bonded_threading_t **bt_ptr);
+                           bonded_threading_t **bt_ptr);
  
  #endif
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index f718d22cbfb2b5102b22aa796d1665b4b37ae780..2d931d28a4700a84bf01e86c59cb30768e7a28a8 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -2315,6 +2315,7 @@ void init_forcerec(FILE                             *fp,
                     gmx::ArrayRef<const std::string>  tabbfnm,
                     const gmx_hw_info_t              &hardwareInfo,
                     const gmx_device_info_t          *deviceInfo,
+                   const bool                        useGpuForBonded,
                     gmx_bool                          bNoSolvOpt,
                     real                              print_force)
  {
@@ -3052,15 +3053,10 @@ void init_forcerec(FILE                             *fp,
      init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
                            &fr->bondedThreading);
  
-    // TODO: Replace this condition by the GPU bonded task boolean
-    if (fr->cutoff_scheme == ecutsVERLET && getenv("GMX_TEST_GPU_BONDEDS"))
+    if (useGpuForBonded)
      {
          fr->gpuBondedLists = new GpuBondedLists;
      }
-    else
-    {
-        fr->gpuBondedLists = nullptr;
-    }
  
      fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
      snew(fr->ewc_t, fr->nthread_ewc);
@@ -3152,6 +3148,7 @@ void done_forcerec(t_forcerec *fr, int numMolBlocks, int numEnergyGroups)
      done_ns(fr->ns, numEnergyGroups);
      sfree(fr->ewc_t);
      tear_down_bonded_threading(fr->bondedThreading);
+    delete fr->gpuBondedLists;
      fr->bondedThreading = nullptr;
      sfree(fr);
  }
diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h

index 64e60c7332eadef06b624f466ea6a92cfc24cc97..1f0694699bf242262f79eb12bfcc55987dee8a06 100644 (file)
--- a/src/gromacs/mdlib/forcerec.h
+++ b/src/gromacs/mdlib/forcerec.h
@@ -115,6 +115,7 @@ void init_interaction_const_tables(FILE                   *fp,
   * \param[in]  tabbfnm     Table potential files for bonded interactions
   * \param[in]  hardwareInfo  Information about hardware
   * \param[in]  deviceInfo  Info about GPU device to use for short-ranged work
+ * \param[in]  useGpuForBonded  Whether bonded interactions will run on a GPU
   * \param[in]  bNoSolvOpt  Do not use solvent optimization
   * \param[in]  print_force Print forces for atoms with force >= print_force
   */
@@ -131,6 +132,7 @@ void init_forcerec(FILE                             *fplog,
                     gmx::ArrayRef<const std::string>  tabbfnm,
                     const gmx_hw_info_t              &hardwareInfo,
                     const gmx_device_info_t          *deviceInfo,
+                   bool                              useGpuForBonded,
                     gmx_bool                          bNoSolvOpt,
                     real                              print_force);
  
diff --git a/src/gromacs/mdrun/legacymdrunoptions.h b/src/gromacs/mdrun/legacymdrunoptions.h

index 7584f2da072bc9e2483b91bd97466fc8a8f7e944..637cd2a6b93dabe9fd632d92e30794922c3a8fec 100644 (file)
--- a/src/gromacs/mdrun/legacymdrunoptions.h
+++ b/src/gromacs/mdrun/legacymdrunoptions.h
@@ -155,13 +155,15 @@ class LegacyMdrunOptions
          { nullptr, "auto", "cpu", "gpu", nullptr };
          const char       *pme_fft_opt_choices[5] =
          { nullptr, "auto", "cpu", "gpu", nullptr };
+        const char       *bonded_opt_choices[5] =
+        { nullptr, "auto", "cpu", "gpu", nullptr };
          gmx_bool          bTryToAppendFiles     = TRUE;
          const char       *gpuIdsAvailable       = "";
          const char       *userGpuTaskAssignment = "";
  
          ImdOptions       &imdOptions = mdrunOptions.imdOptions;
  
-        t_pargs           pa[47] = {
+        t_pargs           pa[48] = {
  
              { "-dd",      FALSE, etRVEC, {&realddxyz},
                "Domain decomposition grid, 0 is optimize" },
@@ -224,6 +226,8 @@ class LegacyMdrunOptions
                "Perform PME calculations on" },
              { "-pmefft", FALSE, etENUM, {pme_fft_opt_choices},
                "Perform PME FFT calculations on" },
+            { "-bonded",     FALSE, etENUM, {bonded_opt_choices},
+              "Perform bonded calculations on" },
              { "-v",       FALSE, etBOOL, {&mdrunOptions.verbose},
                "Be loud and noisy" },
              { "-pforce",  FALSE, etREAL, {&pforce},
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 5bc87e3cc8944e812b51fb25dbf072bd93c4f024..ce9b5b8eb3a1601bc7f1ddc4e95016450810f788 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -75,6 +75,7 @@
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/printhardware.h"
  #include "gromacs/listed-forces/disre.h"
+#include "gromacs/listed-forces/manage-threading.h"
  #include "gromacs/listed-forces/orires.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/utilities.h"
@@ -185,6 +186,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
      newRunner.nbpu_opt            = nbpu_opt;
      newRunner.pme_opt             = pme_opt;
      newRunner.pme_fft_opt         = pme_fft_opt;
+    newRunner.bonded_opt          = bonded_opt;
      newRunner.nstlist_cmdline     = nstlist_cmdline;
      newRunner.replExParams        = replExParams;
      newRunner.pforce              = pforce;
@@ -473,6 +475,7 @@ int Mdrunner::mdrunner()
      auto       nonbondedTarget = findTaskTarget(nbpu_opt);
      auto       pmeTarget       = findTaskTarget(pme_opt);
      auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
+    auto       bondedTarget    = findTaskTarget(bonded_opt);
      PmeRunMode pmeRunMode      = PmeRunMode::None;
  
      // Here we assume that SIMMASTER(cr) does not change even after the
@@ -665,23 +668,34 @@ int Mdrunner::mdrunner()
      // having an assertion?
      //
      // Note that these variables describe only their own node.
+    //
+    // Note that when bonded interactions run on a GPU they always run
+    // alongside a nonbonded task, so do not influence task assignment
+    // even though they affect the force calculation schedule.
      bool useGpuForNonbonded = false;
      bool useGpuForPme       = false;
+    bool useGpuForBonded    = false;
      try
      {
          // It's possible that there are different numbers of GPUs on
          // different nodes, which is the user's responsibilty to
          // handle. If unsuitable, we will notice that during task
          // assignment.
-        bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
+        bool gpusWereDetected  = hwinfo->ngpu_compatible_tot > 0;
+        bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET;
          useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
-                                                                emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
+                                                                emulateGpuNonbonded, usingVerletScheme,
                                                                  gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                  gpusWereDetected);
          auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
          useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                      canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                      gpusWereDetected);
+        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
+        useGpuForBonded =
+            decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme,
+                                            bondedTarget, canUseGpuForBonded, cr->nnodes,
+                                            domdecOptions.numPmeRanks, gpusWereDetected);
  
          pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
          if (pmeRunMode == PmeRunMode::GPU)
@@ -984,7 +998,7 @@ int Mdrunner::mdrunner()
      // Note that in general useGpuForNonbonded, etc. can have a value
      // that is inconsistent with the presence of actual GPUs on any
      // rank, and that is not known to be a problem until the
-    // duty of the ranks on a node become node.
+    // duty of the ranks on a node become known.
      //
      // TODO Later we might need the concept of computeTasksOnThisRank,
      // from which we construct gpuTasksOnThisRank.
@@ -1001,6 +1015,8 @@ int Mdrunner::mdrunner()
      {
          if (useGpuForNonbonded)
          {
+            // Note that any bonded tasks on a GPU always accompany a
+            // non-bonded task.
              if (haveGpus)
              {
                  gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
@@ -1009,6 +1025,10 @@ int Mdrunner::mdrunner()
              {
                  gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
              }
+            else if (bondedTarget == TaskTarget::Gpu)
+            {
+                gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected.");
+            }
          }
      }
      // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
@@ -1032,7 +1052,8 @@ int Mdrunner::mdrunner()
      {
          // Produce the task assignment for this rank.
          gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank);
+                                              mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
+                                              useGpuForBonded, pmeRunMode);
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  
@@ -1198,6 +1219,7 @@ int Mdrunner::mdrunner()
                        opt2fn("-tablep", filenames.size(), filenames.data()),
                        opt2fns("-tableb", filenames.size(), filenames.data()),
                        *hwinfo, nonbondedDeviceInfo,
+                      useGpuForBonded,
                        FALSE,
                        pforce);
  
@@ -1553,6 +1575,8 @@ class Mdrunner::BuilderImplementation
  
          void addPME(const char* pme_opt_, const char* pme_fft_opt_);
  
+        void addBondedTaskAssignment(const char* bonded_opt);
+
          void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
  
          void addFilenames(ArrayRef <const t_filenm> filenames);
@@ -1572,6 +1596,7 @@ class Mdrunner::BuilderImplementation
          const char* nbpu_opt_    = nullptr;
          const char* pme_opt_     = nullptr;
          const char* pme_fft_opt_ = nullptr;
+        const char *bonded_opt_  = nullptr;
  
          MdrunOptions                          mdrunOptions_;
  
@@ -1729,6 +1754,15 @@ Mdrunner Mdrunner::BuilderImplementation::build()
          GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
      }
  
+    if (bonded_opt_)
+    {
+        newRunner.bonded_opt = bonded_opt_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
+    }
+
      newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>();
  
      if (stopHandlerBuilder_)
@@ -1755,6 +1789,11 @@ void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
      pme_fft_opt_ = pme_fft_opt;
  }
  
+void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
+{
+    bonded_opt_ = bonded_opt;
+}
+
  void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
  {
      hardwareOptions_ = hardwareOptions;
@@ -1841,6 +1880,12 @@ MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
      return *this;
  }
  
+MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
+{
+    impl_->addBondedTaskAssignment(bonded_opt);
+    return *this;
+}
+
  Mdrunner MdrunnerBuilder::build()
  {
      return impl_->build();
diff --git a/src/gromacs/mdrun/runner.h b/src/gromacs/mdrun/runner.h

index 75ee32f51b7c3696c41579e99a71d685b57d77a3..465fdd27f159bed229f161b9222d320f8e300eb5 100644 (file)
--- a/src/gromacs/mdrun/runner.h
+++ b/src/gromacs/mdrun/runner.h
@@ -233,6 +233,14 @@ class Mdrunner
           * \todo replace with string or enum class and initialize with sensible value.
           */
          const char                             *pme_fft_opt = nullptr;
+
+        /*! \brief Target bonded interations for "cpu", "gpu", or "auto". Default is "auto".
+         *
+         * \internal
+         * \todo replace with string or enum class and initialize with sensible value.
+         */
+        const char                             *bonded_opt = nullptr;
+
          //! Command-line override for the duration of a neighbor list with the Verlet scheme.
          int                                     nstlist_cmdline = 0;
          //! Parameters for replica-exchange simulations.
@@ -403,6 +411,28 @@ class MdrunnerBuilder final
          MdrunnerBuilder &addElectrostatics(const char* pme_opt,
                                             const char* pme_fft_opt);
  
+        /*!
+         * \brief Assign responsibility for tasks for bonded interactions.
+         *
+         * Required. Director code should provide valid options for
+         * bonded interaction task assignment, whether or not such
+         * interactions are present. The builder does not apply any
+         * defaults, so client code should be prepared to provide
+         * (e.g.) "auto" in the event no user input or logic provides
+         * an alternative argument.
+         *
+         * \param bonded_opt Target bonded interactions for "cpu", "gpu", or "auto".
+         *
+         * Calling must guarantee that the pointed-to C strings are valid through
+         * simulation launch.
+         *
+         * \internal
+         * The arguments are passed as references to elements of arrays of C strings.
+         * \todo Replace with modern strings or (better) enum classes.
+         * \todo Make optional and/or encapsulate into task assignment module.
+         */
+        MdrunnerBuilder &addBondedTaskAssignment(const char *bonded_opt);
+
          /*!
           * \brief Provide access to the multisim communicator to use.
           *
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index 10bcbb8f500ca40a9bd0ccd97d7986310227ce46..dbd9dd67460e535aa28572c5f06a6d35c61d4fbd 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -408,4 +408,70 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
      return false;
  }
  
+bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
+                                     const bool /*useGpuForPme*/,
+                                     const bool       usingVerletScheme,
+                                     const TaskTarget bondedTarget,
+                                     const bool       canUseGpuForBonded,
+                                     const int        /*numRanksPerSimulation*/,
+                                     const int        /*numPmeRanksPerSimulation*/,
+                                     const bool       gpusWereDetected)
+{
+    if (bondedTarget == TaskTarget::Cpu)
+    {
+        return false;
+    }
+
+    if (!usingVerletScheme)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, which requires using "
+                          "the Verlet scheme. Either use the Verlet scheme, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    if (!canUseGpuForBonded)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, but not supported for these "
+                          "simulation settings. Change your settings, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    if (!useGpuForNonbonded)
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError
+                          ("Bonded interactions on the GPU were required, but this requires that "
+                          "short-ranged non-bonded interactions are also run on the GPU. Change "
+                          "your settings, or do not require using GPUs."));
+        }
+
+        return false;
+    }
+
+    // TODO If the bonded kernels do not get fused, then performance
+    // overheads might suggest alternative choices here.
+
+    if (bondedTarget == TaskTarget::Gpu)
+    {
+        // We still don't know whether it is an error if no GPUs are
+        // found.
+        return true;
+    }
+
+    // If we get here, then the user permitted GPUs, which we should
+    // use for bonded interactions if any were detected.
+    return gpusWereDetected;
+}
+
  }  // namespace gmx
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index 0730d52b9a0e13d586760055d88cb1b9b3454fee..9b8279cea775c32388623fe454c51bd0987639a8 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -186,6 +186,30 @@ bool decideWhetherToUseGpusForPme(bool                    useGpuForNonbonded,
                                    int                     numPmeRanksPerSimulation,
                                    bool                    gpusWereDetected);
  
+/*! \brief Decide whether the simulation will try to run bonded tasks on GPUs.
+ *
+ * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
+ * \param[in]  useGpuForPme              Whether GPUs will be used for PME interactions.
+ * \param[in]  usingVerletScheme         Whether the nonbondeds are using the Verlet scheme.
+ * \param[in]  bondedTarget              The user's choice for mdrun -bonded for where to assign tasks.
+ * \param[in]  canUseGpuForBonded        Whether the bonded interactions can run on a GPU
+ * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
+ * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
+ * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
+ *
+ * \returns    Whether the simulation will run bondeded tasks on GPUs.
+ *
+ * \throws     std::bad_alloc          If out of memory
+ *             InconsistentInputError  If the user requirements are inconsistent. */
+bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
+                                     bool       useGpuForPme,
+                                     bool       usingVerletScheme,
+                                     TaskTarget bondedTarget,
+                                     bool       canUseGpuForBonded,
+                                     int        numRanksPerSimulation,
+                                     int        numPmeRanksPerSimulation,
+                                     bool       gpusWereDetected);
+
  }  // namespace gmx
  
  #endif
diff --git a/src/gromacs/taskassignment/reportgpuusage.cpp b/src/gromacs/taskassignment/reportgpuusage.cpp

index e1cb200375a28e241d46d1c0c7f2ffe1df85af80..70f50476c527f68599950238eb9fc5eb21cacf06 100644 (file)
--- a/src/gromacs/taskassignment/reportgpuusage.cpp
+++ b/src/gromacs/taskassignment/reportgpuusage.cpp
@@ -45,6 +45,7 @@
  #include <set>
  #include <string>
  
+#include "gromacs/ewald/pme.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/logger.h"
@@ -84,7 +85,9 @@ reportGpuUsage(const MDLogger                &mdlog,
                 const GpuTaskAssignments      &gpuTaskAssignmentOnRanksOfThisNode,
                 size_t                         numGpuTasksOnThisNode,
                 size_t                         numRanks,
-               bool                           bPrintHostName)
+               bool                           bPrintHostName,
+               bool                           useGpuForBonded,
+               PmeRunMode                     pmeRunMode)
  {
      size_t numGpusInUse = countUniqueGpuIdsUsed(gpuTaskAssignmentOnRanksOfThisNode);
      if (numGpusInUse == 0)
@@ -133,6 +136,17 @@ reportGpuUsage(const MDLogger                &mdlog,
                                      numRanks,
                                      (numRanks > 1) ? "s" : "",
                                      gpuIdsString.c_str());
+        // Because there is a GPU in use, there must be a PP task on a GPU.
+        output += gmx::formatString("PP tasks will do short-ranged%s interactions on the GPU\n",
+                                    useGpuForBonded ? "and most bonded" : "");
+        if (pmeRunMode == PmeRunMode::Mixed)
+        {
+            output += gmx::formatString("PME tasks will do only spread and gather on the GPU\n");
+        }
+        else if (pmeRunMode == PmeRunMode::GPU)
+        {
+            output += gmx::formatString("PME tasks will do all aspects on the GPU\n");
+        }
      }
  
      /* NOTE: this print is only for and on one physical node */
diff --git a/src/gromacs/taskassignment/reportgpuusage.h b/src/gromacs/taskassignment/reportgpuusage.h

index 400cdeb165d34c7a268de0066bf788a6b9ba64fa..fa26ddddf1b0e38a8c00074b52028ba3d4d8b296 100644 (file)
--- a/src/gromacs/taskassignment/reportgpuusage.h
+++ b/src/gromacs/taskassignment/reportgpuusage.h
@@ -50,6 +50,8 @@
  
  #include "gromacs/taskassignment/taskassignment.h"
  
+enum class PmeRunMode;
+
  namespace gmx
  {
  
@@ -67,6 +69,8 @@ class MDLogger;
   * \param[in]  numGpuTasksOnThisNode               The number of GPU tasks on this node.
   * \param[in]  numPpRanks                          Number of PP ranks on this node
   * \param[in]  bPrintHostName                      Print the hostname in the usage information
+ * \param[in]  useGpuForBonded                     Whether GPU PP tasks will do bonded work on the GPU
+ * \param[in]  pmeRunMode                          Describes the execution of PME tasks
   *
   * \throws     std::bad_alloc if out of memory */
  void
@@ -75,8 +79,9 @@ reportGpuUsage(const MDLogger                &mdlog,
                 const GpuTaskAssignments      &gpuTaskAssignmentOnRanksOfThisNode,
                 size_t                         numGpuTasksOnThisNode,
                 size_t                         numPpRanks,
-               bool                           bPrintHostName);
-
+               bool                           bPrintHostName,
+               bool                           useGpuForBonded,
+               PmeRunMode                     pmeRunMode);
  
  }  // namespace gmx
  
diff --git a/src/gromacs/taskassignment/taskassignment.cpp b/src/gromacs/taskassignment/taskassignment.cpp

index 136587e3cb92ae5613351f8dc62bf3bc40634ffc..785a93a8044b50b34380157a55abd8190136144d 100644 (file)
--- a/src/gromacs/taskassignment/taskassignment.cpp
+++ b/src/gromacs/taskassignment/taskassignment.cpp
@@ -190,7 +190,9 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
                    const t_commrec                *cr,
                    const gmx_multisim_t           *ms,
                    const PhysicalNodeCommunicator &physicalNodeComm,
-                  const std::vector<GpuTask>     &gpuTasksOnThisRank)
+                  const std::vector<GpuTask>     &gpuTasksOnThisRank,
+                  bool                            useGpuForBonded,
+                  PmeRunMode                      pmeRunMode)
  {
      /* Communicate among ranks on this node to find each task that can
       * be executed on a GPU, on each rank. */
@@ -307,7 +309,8 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
      }
  
      reportGpuUsage(mdlog, !userGpuTaskAssignment.empty(), taskAssignmentOnRanksOfThisNode,
-                   numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1);
+                   numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1,
+                   useGpuForBonded, pmeRunMode);
  
      // If the user chose a task assignment, give them some hints where appropriate.
      if (!userGpuTaskAssignment.empty())
diff --git a/src/gromacs/taskassignment/taskassignment.h b/src/gromacs/taskassignment/taskassignment.h

index 7c8250a5645691da8d6afdd278967e3ee172ff27..0c57951d0512bcd25bda2ed097b8dbd1f0b1c016 100644 (file)
--- a/src/gromacs/taskassignment/taskassignment.h
+++ b/src/gromacs/taskassignment/taskassignment.h
@@ -56,6 +56,8 @@ struct gmx_hw_info_t;
  struct gmx_multisim_t;
  struct t_commrec;
  
+enum class PmeRunMode;
+
  namespace gmx
  {
  
@@ -108,6 +110,8 @@ using GpuTaskAssignments = std::vector<GpuTaskAssignment>;
   * \param[in]  physicalNodeComm           Communication object for this physical node.
   * \param[in]  gpuTasksOnThisRank         Information about what GPU tasks
   *                                        exist on this rank.
+ * \param[in]  useGpuForBonded            Whether GPU PP tasks will do bonded work on the GPU
+ * \param[in]  pmeRunMode                 Describes the execution of PME tasks
   *
   * \returns  A GPU task assignment for this rank.
   *
@@ -122,7 +126,9 @@ runTaskAssignment(const std::vector<int>         &gpuIdsToUse,
                    const t_commrec                *cr,
                    const gmx_multisim_t           *ms,
                    const PhysicalNodeCommunicator &physicalNodeComm,
-                  const std::vector<GpuTask>     &gpuTasksOnThisRank);
+                  const std::vector<GpuTask>     &gpuTasksOnThisRank,
+                  bool                            useGpuForBonded,
+                  PmeRunMode                      pmeRunMode);
  
  //! Function for whether the task of \c mapping has value \c TaskType.
  template<GpuTask TaskType>
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp

index 6e568a4a57f2c81b53fc1520bc1d5ad7b9bb1e6f..b7ee9fc40ba21ba58b9ff638a96755628b4b60ac 100644 (file)
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -271,6 +271,7 @@ int gmx_mdrun(int argc, char *argv[])
      builder.addNonBonded(options.nbpu_opt_choices[0]);
      // \todo pass by value
      builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]);
+    builder.addBondedTaskAssignment(options.bonded_opt_choices[0]);
      builder.addNeighborList(options.nstlist_cmdline);
      builder.addReplicaExchange(options.replExParams);
      // \todo take ownership of multisim resources (ms)
author	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 11 Oct 2018 19:53:38 +0000 (21:53 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 16 Oct 2018 07:36:50 +0000 (09:36 +0200)
src/api/cpp/context.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/listed-forces/manage-threading.cpp		patch \| blob \| history
src/gromacs/listed-forces/manage-threading.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history
src/gromacs/mdlib/forcerec.h		patch \| blob \| history
src/gromacs/mdrun/legacymdrunoptions.h		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.h		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/gromacs/taskassignment/reportgpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/reportgpuusage.h		patch \| blob \| history
src/gromacs/taskassignment/taskassignment.cpp		patch \| blob \| history
src/gromacs/taskassignment/taskassignment.h		patch \| blob \| history
src/programs/mdrun/mdrun.cpp		patch \| blob \| history