Eliminate redundant GPU force reduction event dependency
authorSzilárd Páll <pall.szilard@gmail.com>
Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
committerAndrey Alekseenko <al42and@gmail.com>
Wed, 26 May 2021 14:06:24 +0000 (14:06 +0000)
CPU force transfers happen on the same strem as GPU reduction, so no explicit dependency is needed.

Additionally, due to a StatePropagatorDataGpu bug, whenever the force readiness event queried is not AtomLocality::All, an incorrect event is returned leading to circular dependency on force reduction, as described in #4032.

This change however does not fix the StatePropagatorDataGpu bug, but it should help avoiding workarounds for the new SYCL backend (#3932).

Refs #4032 #3988

src/gromacs/mdlib/sim_util.cpp

index 67cc9ee160b8b3b061ced626e0dfa602da8a9882..ea4d010d7a187193fd7102b31f5e5987a1e33099 100644 (file)
@@ -1155,13 +1155,15 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
         }
     }
 
-    if ((runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr))
-        && !runScheduleWork->simulationWork.useGpuHaloExchange)
+    if (runScheduleWork->domainWork.haveCpuLocalForceWork && !runScheduleWork->simulationWork.useGpuHaloExchange)
     {
-        auto forcesReadyLocality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All;
-        const bool useGpuForceBufferOps = true;
-        fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(
-                stateGpu->getForcesReadyOnDeviceEvent(forcesReadyLocality, useGpuForceBufferOps));
+        // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed
+        if (!havePPDomainDecomposition(cr))
+        {
+            const bool useGpuForceBufferOps = true;
+            fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(
+                    stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::All, useGpuForceBufferOps));
+        }
     }
 
     if (runScheduleWork->simulationWork.useGpuHaloExchange)
@@ -1183,13 +1185,9 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
                                                                    accumulate);
 
         // register forces and add dependencies
+        // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed
         fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->registerNbnxmForce(
                 Nbnxm::gpu_get_f(nbv->gpu_nbv));
-        if (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork)
-        {
-            fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->addDependency(
-                    stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, true));
-        }
     }
 }