* \param[in] atomLocality Locality of the particles to wait for.
* \param[in] simulationWork The simulation lifetime flags.
* \param[in] stepWork The step lifetime flags.
+ * \param[in] gpuCoordinateHaloLaunched Event recorded when GPU coordinate halo has been launched.
*
* \returns The event to synchronize the stream that consumes coordinates on device.
*/
GpuEventSynchronizer* getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality,
const SimulationWorkload& simulationWork,
- const StepWorkload& stepWork);
+ const StepWorkload& stepWork,
+ GpuEventSynchronizer* gpuCoordinateHaloLaunched = nullptr);
/*! \brief Blocking wait until coordinates are copied to the device.
*
*/
void setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent);
- /*! \brief Copy positions from the GPU memory.
+ /*! \brief Copy positions from the GPU memory, with an optional explicit dependency.
*
* \param[in] h_x Positions buffer in the host memory.
* \param[in] atomLocality Locality of the particles to copy.
+ * \param[in] dependency Dependency event for this operation.
*/
- void copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVec> h_x, AtomLocality atomLocality);
+ void copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVec> h_x,
+ AtomLocality atomLocality,
+ GpuEventSynchronizer* dependency = nullptr);
/*! \brief Wait until coordinates are available on the host.
*
/*! \brief Clear forces in the GPU memory.
*
* \param[in] atomLocality Locality of the particles to clear.
+ * \param[in] dependency Dependency event for this operation.
*/
- void clearForcesOnGpu(AtomLocality atomLocality);
+ void clearForcesOnGpu(AtomLocality atomLocality, GpuEventSynchronizer* dependency);
/*! \brief Get the event synchronizer for the forces ready on device.
*
* 1. The forces are copied to the device (when GPU buffer ops are off)
* 2. The forces are reduced on the device (GPU buffer ops are on)
*
- * \todo Pass step workload instead of the useGpuFBufferOps boolean.
- *
- * \param[in] atomLocality Locality of the particles to wait for.
- * \param[in] useGpuFBufferOps If the force buffer ops are offloaded to the GPU.
+ * \param[in] stepWork Step workload flags
+ * \param[in] simulationWork Simulation workload flags
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
- GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality, bool useGpuFBufferOps);
+ GpuEventSynchronizer* getLocalForcesReadyOnDeviceEvent(StepWorkload stepWork,
+ SimulationWorkload simulationWork);
- /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+ /*! \brief Getter for the event synchronizer for when forces are reduced on the GPU.
*
- * \returns The event to mark when forces are reduced on the GPU.
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \returns The event to mark when forces are reduced on the GPU.
+ */
+ GpuEventSynchronizer* fReducedOnDevice(AtomLocality atomLocality);
+
+ /*! \brief Getter for the event synchronizer for the forces are ready for GPU update.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \returns The event to mark when forces are ready for GPU update.
*/
- GpuEventSynchronizer* fReducedOnDevice();
+ GpuEventSynchronizer* fReadyOnDevice(AtomLocality atomLocality);
/*! \brief Copy forces from the GPU memory.
*
EnumerationArray<AtomLocality, const DeviceStream*> vCopyStreams_ = { { nullptr } };
// Streams to use for forces H2D and D2H copies (one event for each atom locality)
EnumerationArray<AtomLocality, const DeviceStream*> fCopyStreams_ = { { nullptr } };
+ // Streams internal to this module
+ std::unique_ptr<DeviceStream> copyInStream_;
+ std::unique_ptr<DeviceStream> memsetStream_;
/*! \brief An array of events that indicate H2D copy is complete (one event for each atom locality)
*
//! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
- //! An event that the forces were reduced on the GPU
- GpuEventSynchronizer fReducedOnDevice_;
+ //! An array of events that indicate the forces were reduced on the GPU (one event for each atom locality)
+ EnumerationArray<AtomLocality, GpuEventSynchronizer> fReducedOnDevice_;
//! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;