Eliminate redundant GPU force reduction event dependency

author Szilárd Páll <pall.szilard@gmail.com>

Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)

committer Andrey Alekseenko <al42and@gmail.com>

Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
author Szilárd Páll <pall.szilard@gmail.com>
Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
committer Andrey Alekseenko <al42and@gmail.com>
Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 67cc9ee160b8b3b061ced626e0dfa602da8a9882..ea4d010d7a187193fd7102b31f5e5987a1e33099 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1155,13 +1155,15 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
          }
      }
  
-    if ((runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr))
-        && !runScheduleWork->simulationWork.useGpuHaloExchange)
+    if (runScheduleWork->domainWork.haveCpuLocalForceWork && !runScheduleWork->simulationWork.useGpuHaloExchange)
      {
-        auto forcesReadyLocality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All;
-        const bool useGpuForceBufferOps = true;
-        fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(
-                stateGpu->getForcesReadyOnDeviceEvent(forcesReadyLocality, useGpuForceBufferOps));
+        // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed
+        if (!havePPDomainDecomposition(cr))
+        {
+            const bool useGpuForceBufferOps = true;
+            fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(
+                    stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::All, useGpuForceBufferOps));
+        }
      }
  
      if (runScheduleWork->simulationWork.useGpuHaloExchange)
@@ -1183,13 +1185,9 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
                                                                     accumulate);
  
          // register forces and add dependencies
+        // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed
          fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->registerNbnxmForce(
                  Nbnxm::gpu_get_f(nbv->gpu_nbv));
-        if (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork)
-        {
-            fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->addDependency(
-                    stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, true));
-        }
      }
  }
author	Szilárd Páll <pall.szilard@gmail.com>
	Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
committer	Andrey Alekseenko <al42and@gmail.com>
	Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)