// or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested!
pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags, useGpuPmeForceReduction);
stateGpu->copyCoordinatesToGpu(gmx::ArrayRef<gmx::RVec>(pme_pp->x), gmx::StatePropagatorDataGpu::AtomLocality::All);
+ // On the separate PME rank we do not need a synchronizer as we schedule everything in a single stream
+ auto xReadyOnDevice = nullptr;
- pme_gpu_launch_spread(pme, wcycle);
+ pme_gpu_launch_spread(pme, xReadyOnDevice, wcycle);
pme_gpu_launch_complex_transforms(pme, wcycle);
pme_gpu_launch_gather(pme, wcycle, PmeForceOutputHandling::Set);
output = pme_gpu_wait_finish_task(pme, pmeFlags, wcycle);