Balance event consumption for GPU update code path
[alexxy/gromacs.git] / src / gromacs / mdtypes / state_propagator_data_gpu_impl_gpu.cpp
index 30478ef046c20b76599195554696c9f363e9808e..e6db89f1a99512b6cf0592f68d7c5f95987c4a9e 100644 (file)
@@ -372,6 +372,16 @@ GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceE
     }
     else
     {
+        if (stepWork.doNeighborSearch && xUpdatedOnDeviceEvent_)
+        {
+            /* On search steps, we do not consume the result of the GPU update
+             * but rather that of a H2D transfer. So, we reset the event triggered after
+             * update to avoid leaving it unconsumed.
+             * Unfortunately, we don't always have the event marked either (e.g., on the
+             * first step) so we just reset it here.
+             * See Issue #3988. */
+            xUpdatedOnDeviceEvent_->reset();
+        }
         return &xReadyOnDevice_[atomLocality];
     }
 }
@@ -384,6 +394,18 @@ void StatePropagatorDataGpu::Impl::waitCoordinatesCopiedToDevice(AtomLocality at
     wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
 }
 
+void StatePropagatorDataGpu::Impl::consumeCoordinatesCopiedToDeviceEvent(AtomLocality atomLocality)
+{
+    GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+    xReadyOnDevice_[atomLocality].consume();
+}
+
+void StatePropagatorDataGpu::Impl::resetCoordinatesCopiedToDeviceEvent(AtomLocality atomLocality)
+{
+    GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+    xReadyOnDevice_[atomLocality].reset();
+}
+
 void StatePropagatorDataGpu::Impl::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent)
 {
     GMX_ASSERT(xUpdatedOnDeviceEvent != nullptr, "The event synchronizer can not be nullptr.");
@@ -551,6 +573,11 @@ GpuEventSynchronizer* StatePropagatorDataGpu::Impl::fReducedOnDevice(AtomLocalit
     return &fReducedOnDevice_[atomLocality];
 }
 
+void StatePropagatorDataGpu::Impl::consumeForcesReducedOnDeviceEvent(AtomLocality atomLocality)
+{
+    fReducedOnDevice_[atomLocality].consume();
+}
+
 GpuEventSynchronizer* StatePropagatorDataGpu::Impl::fReadyOnDevice(AtomLocality atomLocality)
 {
     return &fReadyOnDevice_[atomLocality];
@@ -657,6 +684,16 @@ void StatePropagatorDataGpu::waitCoordinatesCopiedToDevice(AtomLocality atomLoca
     return impl_->waitCoordinatesCopiedToDevice(atomLocality);
 }
 
+void StatePropagatorDataGpu::consumeCoordinatesCopiedToDeviceEvent(AtomLocality atomLocality)
+{
+    return impl_->consumeCoordinatesCopiedToDeviceEvent(atomLocality);
+}
+
+void StatePropagatorDataGpu::resetCoordinatesCopiedToDeviceEvent(AtomLocality atomLocality)
+{
+    return impl_->resetCoordinatesCopiedToDeviceEvent(atomLocality);
+}
+
 void StatePropagatorDataGpu::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent)
 {
     impl_->setXUpdatedOnDeviceEvent(xUpdatedOnDeviceEvent);
@@ -723,6 +760,11 @@ GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice(AtomLocality atom
     return impl_->fReducedOnDevice(atomLocality);
 }
 
+void StatePropagatorDataGpu::consumeForcesReducedOnDeviceEvent(AtomLocality atomLocality)
+{
+    impl_->consumeForcesReducedOnDeviceEvent(atomLocality);
+}
+
 GpuEventSynchronizer* StatePropagatorDataGpu::fReadyOnDevice(AtomLocality atomLocality)
 {
     return impl_->fReadyOnDevice(atomLocality);