Add basic interface to run update on GPU
authorPaul Bauer <paul.bauer.q@gmail.com>
Thu, 5 Sep 2019 14:01:47 +0000 (16:01 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 19 Sep 2019 16:19:26 +0000 (18:19 +0200)
Added basic command line option to direct update on a GPU,
as well as adding the necessary checks.

Since now the default code path is moved to the GPU, overly
tight tolerances in simulator tests lead to the failure of
one of the tests. This was a result of a slightly low
absolute value of the kinetic energy specified in the
definition of  tolerances, which is also corrected by this
patch.

Change-Id: Ieca885d935aa1e0d592735bc7027f8261288b635

src/api/cpp/context.cpp
src/gromacs/mdrun/isimulator.h
src/gromacs/mdrun/legacymdrunoptions.h
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdrun/runner.h
src/gromacs/taskassignment/decidegpuusage.cpp
src/gromacs/taskassignment/decidegpuusage.h
src/programs/mdrun/mdrun.cpp
src/programs/mdrun/tests/refdata/MdrunTest_WritesHelp.xml
src/programs/mdrun/tests/simulator.cpp

index 589ae10b8adb77c9f07e803fba0f478f2ccd9d10..a2fccfc99ebb1439902df226f67ae18041eefe37 100644 (file)
@@ -186,6 +186,7 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow &work)
         // \todo pass by value
         builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]);
         builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]);
+        builder.addUpdateTaskAssignment(options_.update_opt_choices[0]);
         builder.addNeighborList(options_.nstlist_cmdline);
         builder.addReplicaExchange(options_.replExParams);
         // \todo take ownership of multisim resources (ms)
index 9ae093de7070b9558f40e0c1441eb2156c8ac0aa..e7f3c66249219a4851241eb651d5740c8422395f 100644 (file)
@@ -134,7 +134,8 @@ class ISimulator
             gmx_membed_t                       *membed,
             gmx_walltime_accounting            *walltime_accounting,
             std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder,
-            bool                                doRerun) :
+            bool                                doRerun,
+            bool                                useGpuForUpdate) :
             fplog(fplog),
             cr(cr),
             ms(ms),
@@ -169,7 +170,8 @@ class ISimulator
             membed(membed),
             walltime_accounting(walltime_accounting),
             stopHandlerBuilder(std::move(stopHandlerBuilder)),
-            doRerun(doRerun)
+            doRerun(doRerun),
+            useGpuForUpdate(useGpuForUpdate)
         {}
 
     protected:
@@ -243,6 +245,8 @@ class ISimulator
         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder;
         //! Whether we're doing a rerun.
         bool                                doRerun;
+        //! Whether we will use the GPU for calculating the update.
+        bool                                useGpuForUpdate;
 
 };
 
index 706830d1c97646aecef714a96fb0e47ad699e898..354fb30cbcd832f6b3ebf0f27a8683c5639c8e97 100644 (file)
@@ -156,12 +156,15 @@ class LegacyMdrunOptions
         { nullptr, "auto", "cpu", "gpu", nullptr };
         const char       *bonded_opt_choices[5] =
         { nullptr, "auto", "cpu", "gpu", nullptr };
+        const char       *update_opt_choices[5] =
+        { nullptr, "auto", "cpu", "gpu", nullptr };
         const char       *gpuIdsAvailable       = "";
         const char       *userGpuTaskAssignment = "";
 
+
         ImdOptions       &imdOptions = mdrunOptions.imdOptions;
 
-        t_pargs           pa[47] = {
+        t_pargs           pa[48] = {
 
             { "-dd",      FALSE, etRVEC, {&realddxyz},
               "Domain decomposition grid, 0 is optimize" },
@@ -224,6 +227,8 @@ class LegacyMdrunOptions
               "Perform PME FFT calculations on" },
             { "-bonded",     FALSE, etENUM, {bonded_opt_choices},
               "Perform bonded calculations on" },
+            { "-update", FALSE, etENUM, {update_opt_choices},
+              "Perform update and constraints on"},
             { "-v",       FALSE, etBOOL, {&mdrunOptions.verbose},
               "Be loud and noisy" },
             { "-pforce",  FALSE, etREAL, {&pforce},
index 30580ccfb15b84b9a6217737c879a2b9c84dad48..1a4f8a9347da6126095abebec24e1baa411a50a9 100644 (file)
 
 using gmx::SimulationSignaller;
 
-//! Whether the GPU versions of Leap-Frog integrator and LINCS and SETTLE constraints
-static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
-
 void gmx::LegacySimulator::do_md()
 {
     // TODO Historically, the EM and MD "integrators" used different
@@ -318,7 +315,7 @@ void gmx::LegacySimulator::do_md()
         upd.setNumAtoms(state->natoms);
     }
 
-    if (c_useGpuUpdateConstrain)
+    if (useGpuForUpdate)
     {
         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
@@ -326,7 +323,7 @@ void gmx::LegacySimulator::do_md()
         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
         GMX_LOG(mdlog.info).asParagraph().
-            appendText("Using CUDA GPU-based update and constraints module.");
+            appendText("Updating coordinates on the GPU.");
         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
         integrator->set(top.idef, *mdatoms, ekind->ngtc);
         t_pbc pbc;
@@ -1186,7 +1183,7 @@ void gmx::LegacySimulator::do_md()
             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
         }
 
-        if (c_useGpuUpdateConstrain)
+        if (useGpuForUpdate)
         {
             if (bNS)
             {
index f6659972c151bc482192243043391e0138eff6c1..ad8630e25f4748fdc9aad1641aec7065b7af66e3 100644 (file)
@@ -244,6 +244,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
     newRunner.pme_opt             = pme_opt;
     newRunner.pme_fft_opt         = pme_fft_opt;
     newRunner.bonded_opt          = bonded_opt;
+    newRunner.update_opt          = update_opt;
     newRunner.nstlist_cmdline     = nstlist_cmdline;
     newRunner.replExParams        = replExParams;
     newRunner.pforce              = pforce;
@@ -655,6 +656,7 @@ int Mdrunner::mdrunner()
     auto       pmeTarget       = findTaskTarget(pme_opt);
     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
     auto       bondedTarget    = findTaskTarget(bonded_opt);
+    auto       updateTarget    = findTaskTarget(update_opt);
     PmeRunMode pmeRunMode      = PmeRunMode::None;
 
     // Here we assume that SIMMASTER(cr) does not change even after the
@@ -781,16 +783,17 @@ int Mdrunner::mdrunner()
     // Note that when bonded interactions run on a GPU they always run
     // alongside a nonbonded task, so do not influence task assignment
     // even though they affect the force calculation workload.
-    bool useGpuForNonbonded = false;
-    bool useGpuForPme       = false;
-    bool useGpuForBonded    = false;
+    bool useGpuForNonbonded     = false;
+    bool useGpuForPme           = false;
+    bool useGpuForBonded        = false;
+    bool useGpuForUpdate        = false;
+    bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
     try
     {
         // It's possible that there are different numbers of GPUs on
         // different nodes, which is the user's responsibilty to
         // handle. If unsuitable, we will notice that during task
         // assignment.
-        bool gpusWereDetected      = hwinfo->ngpu_compatible_tot > 0;
         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
                                                                 emulateGpuNonbonded,
@@ -1543,6 +1546,26 @@ int Mdrunner::mdrunner()
                             fr->cginfo_mb);
         }
 
+        if (updateTarget == TaskTarget::Gpu)
+        {
+            if (SIMMASTER(cr))
+            {
+                gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
+                          "of update and constraints to the GPU!");
+            }
+        }
+
+        // Before we start the actual simulator, try if we can run the update task on the GPU.
+        useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
+                                                         useGpuForNonbonded,
+                                                         updateTarget,
+                                                         gpusWereDetected,
+                                                         *inputrec,
+                                                         *mdAtoms,
+                                                         doEssentialDynamics,
+                                                         fcd->orires.nr != 0,
+                                                         fcd->disres.nsystems != 0);
+
         // TODO This is not the right place to manage the lifetime of
         // this data structure, but currently it's the easiest way to
         // make it work.
@@ -1580,7 +1603,8 @@ int Mdrunner::mdrunner()
                     membed,
                     walltime_accounting,
                     std::move(stopHandlerBuilder_),
-                    doRerun);
+                    doRerun,
+                    useGpuForUpdate);
         simulator->run();
 
         if (inputrec->bPull)
@@ -1734,6 +1758,8 @@ class Mdrunner::BuilderImplementation
 
         void addBondedTaskAssignment(const char* bonded_opt);
 
+        void addUpdateTaskAssignment(const char* update_opt);
+
         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
 
         void addFilenames(ArrayRef <const t_filenm> filenames);
@@ -1751,10 +1777,11 @@ class Mdrunner::BuilderImplementation
         // Default parameters copied from runner.h
         // \todo Clarify source(s) of default parameters.
 
-        const char* nbpu_opt_    = nullptr;
-        const char* pme_opt_     = nullptr;
-        const char* pme_fft_opt_ = nullptr;
-        const char *bonded_opt_  = nullptr;
+        const char* nbpu_opt_          = nullptr;
+        const char* pme_opt_           = nullptr;
+        const char* pme_fft_opt_       = nullptr;
+        const char *bonded_opt_        = nullptr;
+        const char *update_opt_        = nullptr;
 
         MdrunOptions                          mdrunOptions_;
 
@@ -1925,6 +1952,16 @@ Mdrunner Mdrunner::BuilderImplementation::build()
         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
     }
 
+    if (update_opt_)
+    {
+        newRunner.update_opt = update_opt_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError("MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
+    }
+
+
     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
 
     if (stopHandlerBuilder_)
@@ -1956,6 +1993,11 @@ void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded
     bonded_opt_ = bonded_opt;
 }
 
+void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
+{
+    update_opt_ = update_opt;
+}
+
 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
 {
     hardwareOptions_ = hardwareOptions;
@@ -2055,6 +2097,12 @@ MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt
     return *this;
 }
 
+MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
+{
+    impl_->addUpdateTaskAssignment(update_opt);
+    return *this;
+}
+
 Mdrunner MdrunnerBuilder::build()
 {
     return impl_->build();
index 6a168087f45e17d25dd7bd0b6559430eedca5d6c..5c7a9a1d38d6324bf3b3c1965a96fbc917a1b42a 100644 (file)
@@ -237,6 +237,13 @@ class Mdrunner
          */
         const char                             *bonded_opt = nullptr;
 
+        /*! \brief Target update calculation for "cpu", "gpu", or "auto". Default is "auto".
+         *
+         * \internal
+         * \todo replace with string or enum class and initialize with sensible value.
+         */
+        const char                             *update_opt = nullptr;
+
         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
         int                                     nstlist_cmdline = 0;
         //! Parameters for replica-exchange simulations.
@@ -430,6 +437,27 @@ class MdrunnerBuilder final
          */
         MdrunnerBuilder &addBondedTaskAssignment(const char *bonded_opt);
 
+        /*! \brief
+         * Assign responsibility for tasks for update and constrain calculation.
+         *
+         * Required. Director code should provide valid options for
+         * update and constraint task assignment. The builder does not apply any
+         * defaults, so client code should be prepared to provide
+         * (e.g.) "auto" in the event no user input or logic provides
+         * an alternative argument.
+         *
+         * \param[in] update_opt Target update calculation for "cpu", "gpu", or "auto".
+         *
+         * Calling must guarantee that the pointed-to C strings are valid through
+         * simulation launch.
+         *
+         * \internal
+         * The arguments are passed as references to elements of arrays of C strings.
+         * \todo Replace with modern strings or (better) enum classes.
+         * \todo Make optional and/or encapsulate into task assignment module.
+         */
+        MdrunnerBuilder &addUpdateTaskAssignment(const char *update_opt);
+
         /*!
          * \brief Provide access to the multisim communicator to use.
          *
index d29cdaadc5857759767c0a02598e8c5f73a55fba..b9f0d7aff848a974cec2851da4700f1269c5d17c 100644 (file)
 #include "gromacs/hardware/hardwaretopology.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/mdatoms.h"
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdrunoptions.h"
 #include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/baseversion.h"
@@ -489,4 +491,87 @@ bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 }
 
+bool decideWhetherToUseGpuForUpdate(bool              isDomainDecomposition,
+                                    bool              useGpuForNonbonded,
+                                    TaskTarget        updateTarget,
+                                    bool              gpusWereDetected,
+                                    const t_inputrec &inputrec,
+                                    const MDAtoms    &mdatoms,
+                                    bool              useEssentialDynamics,
+                                    bool              doOrientationRestraints,
+                                    bool              doDistanceRestraints)
+{
+    if (updateTarget == TaskTarget::Cpu)
+    {
+        return false;
+    }
+
+    std::string errorMessage;
+
+    if (isDomainDecomposition)
+    {
+        errorMessage += "Domain decomposition is not supported.\n";
+    }
+    if (!useGpuForNonbonded)
+    {
+        errorMessage += "Short-ranged non-bonded interaction tasks must run on the GPU.\n";
+    }
+    if (!gpusWereDetected)
+    {
+        errorMessage += "Compatible GPUs must have been found.\n";
+    }
+    if (GMX_GPU != GMX_GPU_CUDA)
+    {
+        errorMessage += "Only a CUDA build is supported.\n";
+    }
+    if (inputrec.eI != eiMD)
+    {
+        errorMessage += "Only the md integrator is supported.\n";
+    }
+    if (inputrec.etc == etcNOSEHOOVER)
+    {
+        errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
+    }
+    if (inputrec.epc != epcNO && inputrec.epc != epcPARRINELLORAHMAN)
+    {
+        errorMessage += "Only Parrinello-Rahman pressure control is supported.\n";
+    }
+    if (mdatoms.mdatoms()->haveVsites)
+    {
+        errorMessage += "Virtual sites are not supported.\n";
+    }
+    if (useEssentialDynamics)
+    {
+        errorMessage += "Essential dynamics is not supported.\n";
+    }
+    if (inputrec.bPull || inputrec.pull)
+    {
+        errorMessage += "Pulling is not supported.\n";
+    }
+    if (doOrientationRestraints)
+    {
+        errorMessage += "Orientation restraints are not supported.\n";
+    }
+    if (doDistanceRestraints)
+    {
+        errorMessage += "Distance restraints are not supported.\n";
+    }
+    if (inputrec.efep != efepNO)
+    {
+        errorMessage += "Free energy perturbations are not supported.\n";
+    }
+    if (!errorMessage.empty())
+    {
+        if (updateTarget == TaskTarget::Gpu)
+        {
+            std::string prefix = gmx::formatString("Update task on the GPU was required,\n"
+                                                   "but the following condition(s) were not satisfied:\n");
+            GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
+        }
+        return false;
+    }
+
+    return true;
+}
+
 }  // namespace gmx
index 818841b722fc82b5777276bd651c4fa1c776c2fe..707b1e49b3c1de7634da72a2d74202ced7a62b64 100644 (file)
@@ -69,6 +69,8 @@ enum class EmulateGpuNonbonded : bool
     Yes
 };
 
+class MDAtoms;
+
 /*! \brief Decide whether this thread-MPI simulation will run
  * nonbonded tasks on GPUs.
  *
@@ -227,6 +229,33 @@ bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
                                      int        numPmeRanksPerSimulation,
                                      bool       gpusWereDetected);
 
+/*! \brief Decide whether to use GPU for update.
+ *
+ * \param[in]  isDomainDecomposition     Whether there more than one domain.
+ * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
+ * \param[in]  updateTarget              User choice for running simulation on GPU.
+ * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
+ * \param[in]  inputrec                  The user input.
+ * \param[in]  mdatoms                   Information about simulation atoms.
+ * \param[in]  useEssentialDynamics      If essential dynamics is active.
+ * \param[in]  doOrientationRestraints   If orientation restraints are enabled.
+ * \param[in]  doDistanceRestraints      If distance restraints are enabled.
+ *
+ * \returns    Whether complete simulation can be run on GPU.
+ * \throws     std::bad_alloc            If out of memory
+ *             InconsistentInputError    If the user requirements are inconsistent.
+ */
+bool decideWhetherToUseGpuForUpdate(bool              isDomainDecomposition,
+                                    bool              useGpuForNonbonded,
+                                    TaskTarget        updateTarget,
+                                    bool              gpusWereDetected,
+                                    const t_inputrec &inputrec,
+                                    const MDAtoms    &mdatoms,
+                                    bool              useEssentialDynamics,
+                                    bool              doOrientationRestraints,
+                                    bool              doDistanceRestraints);
+
+
 }  // namespace gmx
 
 #endif
index 73e324a80719b9c6526f04437a1afaf0e1665ea0..ce690eb9cdaa749e7d0b3f2c01ab662d8671c99e 100644 (file)
@@ -248,6 +248,7 @@ int gmx_mdrun(int argc, char *argv[])
     // \todo pass by value
     builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]);
     builder.addBondedTaskAssignment(options.bonded_opt_choices[0]);
+    builder.addUpdateTaskAssignment(options.update_opt_choices[0]);
     builder.addNeighborList(options.nstlist_cmdline);
     builder.addReplicaExchange(options.replExParams);
     // \todo take ownership of multisim resources (ms)
index 02c48c1eb5ffa1e5224c021bb022af61af1aff11..c2973bb1af56e3561b083f27d82d261ec04638ad 100644 (file)
@@ -17,9 +17,9 @@ gmx [-s [&lt;.tpr&gt;]] [-cpi [&lt;.cpt&gt;]] [-table [&lt;.xvg&gt;]] [-tablep [
     [-pinstride &lt;int&gt;] [-gpu_id &lt;string&gt;] [-gputasks &lt;string&gt;] [-[no]ddcheck]
     [-rdd &lt;real&gt;] [-rcon &lt;real&gt;] [-dlb &lt;enum&gt;] [-dds &lt;real&gt;] [-nb &lt;enum&gt;]
     [-nstlist &lt;int&gt;] [-[no]tunepme] [-pme &lt;enum&gt;] [-pmefft &lt;enum&gt;]
-    [-bonded &lt;enum&gt;] [-[no]v] [-pforce &lt;real&gt;] [-[no]reprod] [-cpt &lt;real&gt;]
-    [-[no]cpnum] [-[no]append] [-nsteps &lt;int&gt;] [-maxh &lt;real&gt;] [-replex &lt;int&gt;]
-    [-nex &lt;int&gt;] [-reseed &lt;int&gt;]
+    [-bonded &lt;enum&gt;] [-update &lt;enum&gt;] [-[no]v] [-pforce &lt;real&gt;] [-[no]reprod]
+    [-cpt &lt;real&gt;] [-[no]cpnum] [-[no]append] [-nsteps &lt;int&gt;] [-maxh &lt;real&gt;]
+    [-replex &lt;int&gt;] [-nex &lt;int&gt;] [-reseed &lt;int&gt;]
 
 DESCRIPTION
 
@@ -265,6 +265,8 @@ Other options:
            Perform PME FFT calculations on: auto, cpu, gpu
  -bonded &lt;enum&gt;             (auto)
            Perform bonded calculations on: auto, cpu, gpu
+ -update &lt;enum&gt;             (auto)
+           Perform update and constraints on: auto, cpu, gpu
  -[no]v                     (no)
            Be loud and noisy
  -pforce &lt;real&gt;             (-1)
index 602ede8a981c471a711ec7d0f1c2aafa1e55565b..a6aa375f14811f0cfaa249676382e087451eef58 100644 (file)
@@ -103,7 +103,7 @@ TEST_P(SimulatorComparisonTest, WithinTolerances)
          },
          {
              interaction_function[F_EKIN].longname,
-             relativeToleranceAsPrecisionDependentUlp(10.0, 100, 40)
+             relativeToleranceAsPrecisionDependentUlp(60.0, 100, 40)
          },
          {
              interaction_function[F_PRES].longname,