Fix force reduction dependencies with process-MPI and PP-PME on same rank
[alexxy/gromacs.git] / src / gromacs / mdlib / sim_util.cpp
index 1d703a72458e68081e9883c6e8e57222aa1da602..ee3e8cdd46fb086e0db83a7fe9ce9ee3dd2a49ba 100644 (file)
@@ -1152,12 +1152,23 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork,
                         : pme_gpu_get_device_f(fr->pmedata);        // PME force buffer on same GPU
         fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr);
 
-        GpuEventSynchronizer* const pmeSynchronizer =
-                (runScheduleWork->simulationWork.haveSeparatePmeRank
-                         ? fr->pmePpCommGpu->getForcesReadySynchronizer() // buffer received from other GPU
-                         : pme_gpu_get_f_ready_synchronizer(fr->pmedata)); // PME force buffer on same GPU
-        if (GMX_THREAD_MPI)
+        if (runScheduleWork->simulationWork.haveSeparatePmeRank)
         {
+            // PME force buffer on remote GPU -
+            // event synchronizer received from other GPU only in case of thread-mpi
+            if (GMX_THREAD_MPI)
+            {
+                GpuEventSynchronizer* const pmeSynchronizer =
+                        fr->pmePpCommGpu->getForcesReadySynchronizer();
+                GMX_ASSERT(pmeSynchronizer != nullptr,
+                           "PME force ready cuda event should not be NULL");
+                fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(pmeSynchronizer);
+            }
+        }
+        else
+        {
+            // PME force buffer on same GPU - add dependency on PME force computation
+            GpuEventSynchronizer* const pmeSynchronizer = pme_gpu_get_f_ready_synchronizer(fr->pmedata);
             GMX_ASSERT(pmeSynchronizer != nullptr, "PME force ready cuda event should not be NULL");
             fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency(pmeSynchronizer);
         }