Link GPU coordinate producer and consumer tasks
[alexxy/gromacs.git] / src / gromacs / ewald / pme_only.cpp
index 10ceb2ce3b8be2e2890c67d1af8681885374285d..8e5ac1de84ce4faf8bcd223b3db43738ccb3d994 100644 (file)
@@ -644,8 +644,10 @@ int gmx_pmeonly(struct gmx_pme_t *pme,
             // or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested!
             pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags, useGpuPmeForceReduction);
             stateGpu->copyCoordinatesToGpu(gmx::ArrayRef<gmx::RVec>(pme_pp->x), gmx::StatePropagatorDataGpu::AtomLocality::All);
+            // On the separate PME rank we do not need a synchronizer as we schedule everything in a single stream
+            auto xReadyOnDevice = nullptr;
 
-            pme_gpu_launch_spread(pme, wcycle);
+            pme_gpu_launch_spread(pme, xReadyOnDevice, wcycle);
             pme_gpu_launch_complex_transforms(pme, wcycle);
             pme_gpu_launch_gather(pme, wcycle, PmeForceOutputHandling::Set);
             output = pme_gpu_wait_finish_task(pme, pmeFlags, wcycle);