#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/fixedcapacityvector.h"
#include "gromacs/utility/gmxmpi.h"
struct gmx_domdec_t;
public:
/*! \brief Creates GPU Halo Exchange object.
*
- * Coordinate Halo exchange will be performed in \c
- * StreamNonLocal, and the \c communicateHaloCoordinates
- * method must be called before any subsequent operations that
- * access non-local parts of the coordinate buffer (such as
- * the non-local non-bonded kernels). It also must be called
- * after the local coordinates buffer operations (where the
- * coordinates are copied to the device and hence the \c
- * coordinatesReadyOnDeviceEvent is recorded). Force Halo exchange
- * will be performed in \c streamNonLocal and the \c
- * communicateHaloForces method must be called after the
- * non-local buffer operations, after the local force buffer
- * has been copied to the GPU (if CPU forces are present), and
- * before the local buffer operations. The force halo exchange
- * does not yet support virial steps.
+ * Coordinate Halo exchange will be performed in its own stream
+ * with appropriate event-based synchronization, and the \c
+ * communicateHaloCoordinates method must be called before any
+ * subsequent operations that access non-local parts of the
+ * coordinate buffer (such as the non-local non-bonded
+ * kernels). It also must be called after the local coordinates
+ * buffer operations (where the coordinates are copied to the
+ * device and hence the \c coordinatesReadyOnDeviceEvent is
+ * recorded). Force Halo exchange will also be performed in its
+ * own stream with appropriate event-based synchronization, and
+ * the \c communicateHaloForces method must be called after the
+ * non-local buffer operations, after the local force buffer has
+ * been copied to the GPU (if CPU forces are present), and before
+ * the local buffer operations. The force halo exchange does not
+ * yet support virial steps.
*
* \param [inout] dd domdec structure
* \param [in] dimIndex the dimension index for this instance
* \param [in] mpi_comm_mysim communicator used for simulation
* \param [in] deviceContext GPU device context
- * \param [in] streamLocal local NB CUDA stream.
- * \param [in] streamNonLocal non-local NB CUDA stream.
* \param [in] pulse the communication pulse for this instance
* \param [in] wcycle The wallclock counter
*/
int dimIndex,
MPI_Comm mpi_comm_mysim,
const DeviceContext& deviceContext,
- const DeviceStream& streamLocal,
- const DeviceStream& streamNonLocal,
int pulse,
gmx_wallcycle* wcycle);
~GpuHaloExchange();
* Must be called after local setCoordinates (which records an
* event when the coordinate data has been copied to the
* device).
- * \param [in] box Coordinate box (from which shifts will be constructed)
- * \param [in] coordinatesReadyOnDeviceEvent event recorded when coordinates have been copied to device
+ * \param [in] box Coordinate box (from which shifts will be constructed)
+ * \param [in] dependencyEvent Dependency event for this operation
+ * \returns Event recorded when this operation has been launched
*/
- void communicateHaloCoordinates(const matrix box, GpuEventSynchronizer* coordinatesReadyOnDeviceEvent);
+ GpuEventSynchronizer* communicateHaloCoordinates(const matrix box, GpuEventSynchronizer* dependencyEvent);
/*! \brief GPU halo exchange of force buffer.
* \param[in] accumulateForces True if forces should accumulate, otherwise they are set
+ * \param[in] dependencyEvents Dependency events for this operation
*/
- void communicateHaloForces(bool accumulateForces);
+ void communicateHaloForces(bool accumulateForces,
+ FixedCapacityVector<GpuEventSynchronizer*, 2>* dependencyEvents);
/*! \brief Get the event synchronizer for the forces ready on device.
* \returns The event to synchronize the stream that consumes forces on device.