Switch the GPU buffer ops on when update is on GPU

author Artem Zhmurov <zhmurov@gmail.com>

Thu, 24 Oct 2019 17:15:40 +0000 (19:15 +0200)

committer Szilárd Páll <pall.szilard@gmail.com>

Tue, 29 Oct 2019 09:54:49 +0000 (10:54 +0100)
author Artem Zhmurov <zhmurov@gmail.com>
Thu, 24 Oct 2019 17:15:40 +0000 (19:15 +0200)
committer Szilárd Páll <pall.szilard@gmail.com>
Tue, 29 Oct 2019 09:54:49 +0000 (10:54 +0100)
diff --git a/admin/builds/gromacs.py b/admin/builds/gromacs.py

index 4fa518f1864865c2b4d442d5e6b9d8d9d9e87590..49007c5a63f125a098c719b7e73bf4b1b005d2e2 100644 (file)
--- a/admin/builds/gromacs.py
+++ b/admin/builds/gromacs.py
@@ -185,7 +185,6 @@ def do_build(context):
  
      # GPU update flag enables GPU update+constraints as well as buffer ops (dependency)
      if context.opts.gpuupdate:
-        context.env.set_env_var('GMX_USE_GPU_BUFFER_OPS', "1")
          context.env.set_env_var('GMX_FORCE_UPDATE_DEFAULT_GPU', "1")
  
      regressiontests_path = context.workspace.get_project_dir(Project.REGRESSIONTESTS)
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index bf6532b3e01d38baba2934422e9f4de34bebc198..245d6f9e50e838de17edd51e33221e399b5caf93 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -885,6 +885,27 @@ int Mdrunner::mdrunner()
      // and report those features that are enabled.
      const DevelopmentFeatureFlags devFlags = manageDevelopmentFeatures(mdlog, useGpuForNonbonded, useGpuForPme);
  
+    // NOTE: The devFlags need decideWhetherToUseGpusForNonbonded(...) and decideWhetherToUseGpusForPme(...) for overrides,
+    //       decideWhetherToUseGpuForUpdate() needs devFlags for the '-update auto' override, hence the interleaving.
+    // NOTE: When the simulationWork is constructed, the useGpuForUpdate overrides the devFlags.enableGpuBufferOps.
+    try
+    {
+        useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
+                                                         useDomainDecomposition,
+                                                         useGpuForPme,
+                                                         useGpuForNonbonded,
+                                                         updateTarget,
+                                                         gpusWereDetected,
+                                                         *inputrec,
+                                                         gmx_mtop_interaction_count(mtop, IF_VSITE) > 0,
+                                                         doEssentialDynamics,
+                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+                                                         gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
+                                                         replExParams.exchangeInterval > 0);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
+
      // Build restraints.
      // TODO: hide restraint implementation details from Mdrunner.
      // There is nothing unique about restraints at this point as far as the
@@ -1333,7 +1354,7 @@ int Mdrunner::mdrunner()
          // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
          if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
          {
-            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
+            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
              void *streamLocal              = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
              void *streamNonLocal           = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
              void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
@@ -1545,21 +1566,6 @@ int Mdrunner::mdrunner()
                              fr->cginfo_mb);
          }
  
-        // Before we start the actual simulator, try if we can run the update task on the GPU.
-        useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
-                                                         DOMAINDECOMP(cr),
-                                                         useGpuForPme,
-                                                         useGpuForNonbonded,
-                                                         devFlags.enableGpuBufferOps,
-                                                         updateTarget,
-                                                         gpusWereDetected,
-                                                         *inputrec,
-                                                         mdAtoms->mdatoms()->haveVsites,
-                                                         doEssentialDynamics,
-                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
-                                                         gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
-                                                         replExParams.exchangeInterval > 0);
-
          const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
                      false,
                      inputrec, doRerun, vsite.get(), ms, replExParams,
@@ -1568,8 +1574,21 @@ int Mdrunner::mdrunner()
  
          const bool useModularSimulator = inputIsCompatibleWithModularSimulator && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
  
+        // TODO This is not the right place to manage the lifetime of
+        // this data structure, but currently it's the easiest way to
+        // make it work.
+        MdrunScheduleWorkload runScheduleWork;
+        // Also populates the simulation constant workload description.
+        runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
+                                                                  pmeRunMode,
+                                                                  useGpuForBonded,
+                                                                  useGpuForUpdate,
+                                                                  devFlags.enableGpuBufferOps,
+                                                                  devFlags.enableGpuHaloExchange,
+                                                                  devFlags.enableGpuPmePPComm);
+
          std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
-        if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps))
+        if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || runScheduleWork.simulationWork.useGpuBufferOps))
          {
              const void         *pmeStream      = pme_gpu_get_device_stream(fr->pmedata);
              const void         *localStream    = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr;
@@ -1587,20 +1606,6 @@ int Mdrunner::mdrunner()
              fr->stateGpu = stateGpu.get();
          }
  
-        // TODO This is not the right place to manage the lifetime of
-        // this data structure, but currently it's the easiest way to
-        // make it work.
-        MdrunScheduleWorkload runScheduleWork;
-        // Also populates the simulation constant workload description.
-        runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
-                                                                  pmeRunMode,
-                                                                  useGpuForBonded,
-                                                                  useGpuForUpdate,
-                                                                  devFlags.enableGpuBufferOps,
-                                                                  devFlags.enableGpuHaloExchange,
-                                                                  devFlags.enableGpuPmePPComm);
-
-
          GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
          SimulatorBuilder simulatorBuilder;
  
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index 71ca58d1584f1f5647bc5b584a0fcf0190337b81..f40814e23a2085be8e551d5283ca6008a3ab5862 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -494,7 +494,6 @@ bool decideWhetherToUseGpuForUpdate(const bool        forceGpuUpdateDefaultOn,
                                      const bool        isDomainDecomposition,
                                      const bool        useGpuForPme,
                                      const bool        useGpuForNonbonded,
-                                    const bool        useGpuForBufferOps,
                                      const TaskTarget  updateTarget,
                                      const bool        gpusWereDetected,
                                      const t_inputrec &inputrec,
@@ -516,10 +515,10 @@ bool decideWhetherToUseGpuForUpdate(const bool        forceGpuUpdateDefaultOn,
      {
          errorMessage += "Domain decomposition is not supported.\n";
      }
-    // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least:
-    // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread).
-    // 2. Non-bonded interactions and buffer ops are on the GPU.
-    if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps)))
+    // Using the GPU-version of update if:
+    // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
+    // 2. Non-bonded interactions are on the GPU.
+    if (!(useGpuForPme || useGpuForNonbonded))
      {
          errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
      }
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index c74ca0a197ce96071b0a9001a8b7152e0fa4f39c..6b8685fa3e1f886e029915a959e6143d798840c7 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -235,7 +235,6 @@ bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
   * \param[in]  isDomainDecomposition     Whether there more than one domain.
   * \param[in]  useGpuForPme              Whether GPUs will be used for PME interactions.
   * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
- * \param[in]  useGpuForBufferOps        Whether GPUs will be used for buffer operations.
   * \param[in]  updateTarget              User choice for running simulation on GPU.
   * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
   * \param[in]  inputrec                  The user input.
@@ -253,7 +252,6 @@ bool decideWhetherToUseGpuForUpdate(bool              forceGpuUpdateDefaultOn,
                                      bool              isDomainDecomposition,
                                      bool              useGpuForPme,
                                      bool              useGpuForNonbonded,
-                                    bool              useGpuForBufferOps,
                                      TaskTarget        updateTarget,
                                      bool              gpusWereDetected,
                                      const t_inputrec &inputrec,
diff --git a/src/gromacs/taskassignment/decidesimulationworkload.cpp b/src/gromacs/taskassignment/decidesimulationworkload.cpp

index 597cf1a81580119c6a48907574c9aa9f43661953..ea82f95f42bccd1eef0cef622a3d343a0204ec28 100644 (file)
--- a/src/gromacs/taskassignment/decidesimulationworkload.cpp
+++ b/src/gromacs/taskassignment/decidesimulationworkload.cpp
@@ -53,7 +53,7 @@ namespace gmx
  SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
                                              PmeRunMode pmeRunMode,
                                              bool       useGpuForBonded,
-                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForUpdate,
                                              bool       useGpuForBufferOps,
                                              bool       useGpuHaloExchange,
                                              bool       useGpuPmePpComm)
@@ -65,8 +65,8 @@ SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
      simulationWorkload.useGpuPme                 = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
      simulationWorkload.useGpuPmeFft              = (pmeRunMode == PmeRunMode::Mixed);
      simulationWorkload.useGpuBonded              = useGpuForBonded;
-    simulationWorkload.useGpuUpdate              = useGpuForUpdateConstraints;
-    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps;
+    simulationWorkload.useGpuUpdate              = useGpuForUpdate;
+    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps || useGpuForUpdate;
      simulationWorkload.useGpuHaloExchange        = useGpuHaloExchange;
      simulationWorkload.useGpuPmePpCommunication  = useGpuPmePpComm;
      simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
diff --git a/src/gromacs/taskassignment/decidesimulationworkload.h b/src/gromacs/taskassignment/decidesimulationworkload.h

index 0e87da68c599e5520b71618064f8b6f55c39d029..b389da53c168a36d98396f004d532f196f800a17 100644 (file)
--- a/src/gromacs/taskassignment/decidesimulationworkload.h
+++ b/src/gromacs/taskassignment/decidesimulationworkload.h
@@ -59,8 +59,8 @@ namespace gmx
   *                               calculations on GPU(s).
   * \param[in] pmeRunMode         Run mode indicating what resource is PME execured on.
   * \param[in] useGpuForBonded    If bonded interactions are calculated on GPU(s).
- * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
- *                                       GPU(s).
+ * \param[in] useGpuForUpdate    If coordinate update and constraint solving is performed on
+ *                               GPU(s).
   * \param[in] useGpuForBufferOps If buffer ops / reduction are calculated on GPU(s).
   * \param[in] useGpuHaloExchange If GPU direct communication is used in halo exchange.
   * \param[in] useGpuPmePpComm    If GPu direct communication is used in PME-PP communication.
@@ -69,7 +69,7 @@ namespace gmx
  SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
                                              PmeRunMode pmeRunMode,
                                              bool       useGpuForBonded,
-                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForUpdate,
                                              bool       useGpuForBufferOps,
                                              bool       useGpuHaloExchange,
                                              bool       useGpuPmePpComm);
author	Artem Zhmurov <zhmurov@gmail.com>
	Thu, 24 Oct 2019 17:15:40 +0000 (19:15 +0200)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Tue, 29 Oct 2019 09:54:49 +0000 (10:54 +0100)
admin/builds/gromacs.py		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/gromacs/taskassignment/decidesimulationworkload.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidesimulationworkload.h		patch \| blob \| history