Switch the GPU buffer ops on when update is on GPU
authorArtem Zhmurov <zhmurov@gmail.com>
Thu, 24 Oct 2019 17:15:40 +0000 (19:15 +0200)
committerSzilárd Páll <pall.szilard@gmail.com>
Tue, 29 Oct 2019 09:54:49 +0000 (10:54 +0100)
The update is supported on the GPU only when buffer ops are also
offloaded. This changes the behavior from requiring the
GMX_USE_GPU_BUFFER_OPS to be enabled to it being overriden.

Change-Id: Icdc154daa053f135b0df503697273016a830fb18

admin/builds/gromacs.py
src/gromacs/mdrun/runner.cpp
src/gromacs/taskassignment/decidegpuusage.cpp
src/gromacs/taskassignment/decidegpuusage.h
src/gromacs/taskassignment/decidesimulationworkload.cpp
src/gromacs/taskassignment/decidesimulationworkload.h

index 4fa518f1864865c2b4d442d5e6b9d8d9d9e87590..49007c5a63f125a098c719b7e73bf4b1b005d2e2 100644 (file)
@@ -185,7 +185,6 @@ def do_build(context):
 
     # GPU update flag enables GPU update+constraints as well as buffer ops (dependency)
     if context.opts.gpuupdate:
-        context.env.set_env_var('GMX_USE_GPU_BUFFER_OPS', "1")
         context.env.set_env_var('GMX_FORCE_UPDATE_DEFAULT_GPU', "1")
 
     regressiontests_path = context.workspace.get_project_dir(Project.REGRESSIONTESTS)
index bf6532b3e01d38baba2934422e9f4de34bebc198..245d6f9e50e838de17edd51e33221e399b5caf93 100644 (file)
@@ -885,6 +885,27 @@ int Mdrunner::mdrunner()
     // and report those features that are enabled.
     const DevelopmentFeatureFlags devFlags = manageDevelopmentFeatures(mdlog, useGpuForNonbonded, useGpuForPme);
 
+    // NOTE: The devFlags need decideWhetherToUseGpusForNonbonded(...) and decideWhetherToUseGpusForPme(...) for overrides,
+    //       decideWhetherToUseGpuForUpdate() needs devFlags for the '-update auto' override, hence the interleaving.
+    // NOTE: When the simulationWork is constructed, the useGpuForUpdate overrides the devFlags.enableGpuBufferOps.
+    try
+    {
+        useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
+                                                         useDomainDecomposition,
+                                                         useGpuForPme,
+                                                         useGpuForNonbonded,
+                                                         updateTarget,
+                                                         gpusWereDetected,
+                                                         *inputrec,
+                                                         gmx_mtop_interaction_count(mtop, IF_VSITE) > 0,
+                                                         doEssentialDynamics,
+                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+                                                         gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
+                                                         replExParams.exchangeInterval > 0);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
+
     // Build restraints.
     // TODO: hide restraint implementation details from Mdrunner.
     // There is nothing unique about restraints at this point as far as the
@@ -1333,7 +1354,7 @@ int Mdrunner::mdrunner()
         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
         {
-            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
+            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
             void *streamLocal              = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
             void *streamNonLocal           = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
@@ -1545,21 +1566,6 @@ int Mdrunner::mdrunner()
                             fr->cginfo_mb);
         }
 
-        // Before we start the actual simulator, try if we can run the update task on the GPU.
-        useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn,
-                                                         DOMAINDECOMP(cr),
-                                                         useGpuForPme,
-                                                         useGpuForNonbonded,
-                                                         devFlags.enableGpuBufferOps,
-                                                         updateTarget,
-                                                         gpusWereDetected,
-                                                         *inputrec,
-                                                         mdAtoms->mdatoms()->haveVsites,
-                                                         doEssentialDynamics,
-                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
-                                                         gmx_mtop_ftype_count(mtop, F_DISRES) > 0,
-                                                         replExParams.exchangeInterval > 0);
-
         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
                     false,
                     inputrec, doRerun, vsite.get(), ms, replExParams,
@@ -1568,8 +1574,21 @@ int Mdrunner::mdrunner()
 
         const bool useModularSimulator = inputIsCompatibleWithModularSimulator && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
 
+        // TODO This is not the right place to manage the lifetime of
+        // this data structure, but currently it's the easiest way to
+        // make it work.
+        MdrunScheduleWorkload runScheduleWork;
+        // Also populates the simulation constant workload description.
+        runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
+                                                                  pmeRunMode,
+                                                                  useGpuForBonded,
+                                                                  useGpuForUpdate,
+                                                                  devFlags.enableGpuBufferOps,
+                                                                  devFlags.enableGpuHaloExchange,
+                                                                  devFlags.enableGpuPmePPComm);
+
         std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
-        if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps))
+        if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || runScheduleWork.simulationWork.useGpuBufferOps))
         {
             const void         *pmeStream      = pme_gpu_get_device_stream(fr->pmedata);
             const void         *localStream    = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr;
@@ -1587,20 +1606,6 @@ int Mdrunner::mdrunner()
             fr->stateGpu = stateGpu.get();
         }
 
-        // TODO This is not the right place to manage the lifetime of
-        // this data structure, but currently it's the easiest way to
-        // make it work.
-        MdrunScheduleWorkload runScheduleWork;
-        // Also populates the simulation constant workload description.
-        runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
-                                                                  pmeRunMode,
-                                                                  useGpuForBonded,
-                                                                  useGpuForUpdate,
-                                                                  devFlags.enableGpuBufferOps,
-                                                                  devFlags.enableGpuHaloExchange,
-                                                                  devFlags.enableGpuPmePPComm);
-
-
         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
         SimulatorBuilder simulatorBuilder;
 
index 71ca58d1584f1f5647bc5b584a0fcf0190337b81..f40814e23a2085be8e551d5283ca6008a3ab5862 100644 (file)
@@ -494,7 +494,6 @@ bool decideWhetherToUseGpuForUpdate(const bool        forceGpuUpdateDefaultOn,
                                     const bool        isDomainDecomposition,
                                     const bool        useGpuForPme,
                                     const bool        useGpuForNonbonded,
-                                    const bool        useGpuForBufferOps,
                                     const TaskTarget  updateTarget,
                                     const bool        gpusWereDetected,
                                     const t_inputrec &inputrec,
@@ -516,10 +515,10 @@ bool decideWhetherToUseGpuForUpdate(const bool        forceGpuUpdateDefaultOn,
     {
         errorMessage += "Domain decomposition is not supported.\n";
     }
-    // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least:
-    // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread).
-    // 2. Non-bonded interactions and buffer ops are on the GPU.
-    if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps)))
+    // Using the GPU-version of update if:
+    // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
+    // 2. Non-bonded interactions are on the GPU.
+    if (!(useGpuForPme || useGpuForNonbonded))
     {
         errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
     }
index c74ca0a197ce96071b0a9001a8b7152e0fa4f39c..6b8685fa3e1f886e029915a959e6143d798840c7 100644 (file)
@@ -235,7 +235,6 @@ bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
  * \param[in]  isDomainDecomposition     Whether there more than one domain.
  * \param[in]  useGpuForPme              Whether GPUs will be used for PME interactions.
  * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
- * \param[in]  useGpuForBufferOps        Whether GPUs will be used for buffer operations.
  * \param[in]  updateTarget              User choice for running simulation on GPU.
  * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
  * \param[in]  inputrec                  The user input.
@@ -253,7 +252,6 @@ bool decideWhetherToUseGpuForUpdate(bool              forceGpuUpdateDefaultOn,
                                     bool              isDomainDecomposition,
                                     bool              useGpuForPme,
                                     bool              useGpuForNonbonded,
-                                    bool              useGpuForBufferOps,
                                     TaskTarget        updateTarget,
                                     bool              gpusWereDetected,
                                     const t_inputrec &inputrec,
index 597cf1a81580119c6a48907574c9aa9f43661953..ea82f95f42bccd1eef0cef622a3d343a0204ec28 100644 (file)
@@ -53,7 +53,7 @@ namespace gmx
 SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
                                             PmeRunMode pmeRunMode,
                                             bool       useGpuForBonded,
-                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForUpdate,
                                             bool       useGpuForBufferOps,
                                             bool       useGpuHaloExchange,
                                             bool       useGpuPmePpComm)
@@ -65,8 +65,8 @@ SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
     simulationWorkload.useGpuPme                 = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
     simulationWorkload.useGpuPmeFft              = (pmeRunMode == PmeRunMode::Mixed);
     simulationWorkload.useGpuBonded              = useGpuForBonded;
-    simulationWorkload.useGpuUpdate              = useGpuForUpdateConstraints;
-    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps;
+    simulationWorkload.useGpuUpdate              = useGpuForUpdate;
+    simulationWorkload.useGpuBufferOps           = useGpuForBufferOps || useGpuForUpdate;
     simulationWorkload.useGpuHaloExchange        = useGpuHaloExchange;
     simulationWorkload.useGpuPmePpCommunication  = useGpuPmePpComm;
     simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
index 0e87da68c599e5520b71618064f8b6f55c39d029..b389da53c168a36d98396f004d532f196f800a17 100644 (file)
@@ -59,8 +59,8 @@ namespace gmx
  *                               calculations on GPU(s).
  * \param[in] pmeRunMode         Run mode indicating what resource is PME execured on.
  * \param[in] useGpuForBonded    If bonded interactions are calculated on GPU(s).
- * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
- *                                       GPU(s).
+ * \param[in] useGpuForUpdate    If coordinate update and constraint solving is performed on
+ *                               GPU(s).
  * \param[in] useGpuForBufferOps If buffer ops / reduction are calculated on GPU(s).
  * \param[in] useGpuHaloExchange If GPU direct communication is used in halo exchange.
  * \param[in] useGpuPmePpComm    If GPu direct communication is used in PME-PP communication.
@@ -69,7 +69,7 @@ namespace gmx
 SimulationWorkload createSimulationWorkload(bool       useGpuForNonbonded,
                                             PmeRunMode pmeRunMode,
                                             bool       useGpuForBonded,
-                                            bool       useGpuForUpdateConstraints,
+                                            bool       useGpuForUpdate,
                                             bool       useGpuForBufferOps,
                                             bool       useGpuHaloExchange,
                                             bool       useGpuPmePpComm);