#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
+class GpuEventSynchronizer;
+
struct gmx_mtop_t;
struct t_idef;
struct t_inputrec;
public:
/*! \brief Create Update-Constrain object.
*
- * \param[in] ir Input record data: LINCS takes number of iterations and order of
- * projection from it.
- * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
- * and target O-H and H-H distances from this object.
- * \param[in] commandStream GPU stream to use. Can be nullptr.
+ * The constructor is given a non-nullptr \p commandStream, in which all the update and constrain
+ * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
+ * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
+ * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
+ * markEvent(...) method is called unconditionally.
+ *
+ * \param[in] ir Input record data: LINCS takes number of iterations and order of
+ * projection from it.
+ * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
+ * and target O-H and H-H distances from this object.
+ * \param[in] commandStream GPU stream to use. Can be nullptr.
+ * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
*/
- UpdateConstrainCuda(const t_inputrec &ir,
- const gmx_mtop_t &mtop,
- const void *commandStream);
+ UpdateConstrainCuda(const t_inputrec &ir,
+ const gmx_mtop_t &mtop,
+ const void *commandStream,
+ GpuEventSynchronizer *xUpdatedOnDevice);
~UpdateConstrainCuda();
/*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device.
*/
- void *getCoordinatesReadySync();
+ GpuEventSynchronizer* getCoordinatesReadySync();
private:
class Impl;
UpdateConstrainCuda::UpdateConstrainCuda(gmx_unused const t_inputrec &ir,
gmx_unused const gmx_mtop_t &mtop,
- gmx_unused const void *commandStream)
+ gmx_unused const void *commandStream,
+ gmx_unused GpuEventSynchronizer *xUpdatedOnDevice)
: impl_(nullptr)
{
GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
}
-void* UpdateConstrainCuda::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainCuda::getCoordinatesReadySync()
{
GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
return nullptr;
}
}
- coordinatesReady_.markEvent(commandStream_);
+ coordinatesReady_->markEvent(commandStream_);
return;
}
-UpdateConstrainCuda::Impl::Impl(const t_inputrec &ir,
- const gmx_mtop_t &mtop,
- const void *commandStream)
+UpdateConstrainCuda::Impl::Impl(const t_inputrec &ir,
+ const gmx_mtop_t &mtop,
+ const void *commandStream,
+ GpuEventSynchronizer *xUpdatedOnDevice) :
+ coordinatesReady_(xUpdatedOnDevice)
{
+ GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
commandStream != nullptr ? commandStream_ = *static_cast<const CommandStream*>(commandStream) : commandStream_ = nullptr;
+
integrator_ = std::make_unique<LeapFrogCuda>(commandStream_);
lincsCuda_ = std::make_unique<LincsCuda>(ir.nLincsIter, ir.nProjOrder, commandStream_);
settleCuda_ = std::make_unique<SettleCuda>(mtop, commandStream_);
void UpdateConstrainCuda::Impl::waitCoordinatesReadyOnDevice()
{
- coordinatesReady_.waitForEvent();
+ coordinatesReady_->waitForEvent();
}
-void *UpdateConstrainCuda::Impl::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainCuda::Impl::getCoordinatesReadySync()
{
- return static_cast<void*> (&coordinatesReady_);
+ return coordinatesReady_;
}
-UpdateConstrainCuda::UpdateConstrainCuda(const t_inputrec &ir,
- const gmx_mtop_t &mtop,
- const void *commandStream)
- : impl_(new Impl(ir, mtop, commandStream))
+UpdateConstrainCuda::UpdateConstrainCuda(const t_inputrec &ir,
+ const gmx_mtop_t &mtop,
+ const void *commandStream,
+ GpuEventSynchronizer *xUpdatedOnDevice)
+ : impl_(new Impl(ir, mtop, commandStream, xUpdatedOnDevice))
{
}
impl_->waitCoordinatesReadyOnDevice();
}
-void* UpdateConstrainCuda::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainCuda::getCoordinatesReadySync()
{
return impl_->getCoordinatesReadySync();
}
{
public:
- /*! \brief Create Update-Constrain object
+ /*! \brief Create Update-Constrain object.
*
- * \param[in] ir Input record data: LINCS takes number of iterations and order of
- * projection from it.
- * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
- * and target O-H and H-H distances from this object.
- * \param[in] commandStream GPU stream to use. Can be nullptr.
+ * The constructor is given a non-nullptr \p commandStream, in which all the update and constrain
+ * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
+ * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
+ * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
+ * markEvent(...) method is called unconditionally.
+ *
+ * \param[in] ir Input record data: LINCS takes number of iterations and order of
+ * projection from it.
+ * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
+ * and target O-H and H-H distances from this object.
+ * \param[in] commandStream GPU stream to use. Can be nullptr.
+ * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
*/
Impl(const t_inputrec &ir,
const gmx_mtop_t &mtop,
- const void *commandStream);
+ const void *commandStream,
+ GpuEventSynchronizer *xUpdatedOnDevice);
~Impl();
/*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device.
*/
- void *getCoordinatesReadySync();
+ GpuEventSynchronizer* getCoordinatesReadySync();
private:
//! SETTLE CUDA object for water constrains
std::unique_ptr<SettleCuda> settleCuda_;
- //! An event to indicate when the update of coordinates is complete
- GpuEventSynchronizer coordinatesReady_;
+ //! An pointer to the event to indicate when the update of coordinates is complete
+ GpuEventSynchronizer *coordinatesReady_;
};
} // namespace gmx
GMX_LOG(mdlog.info).asParagraph().
appendText("Updating coordinates on the GPU.");
}
- integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global, fr->stateGpu->getUpdateStream());
+ integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global, fr->stateGpu->getUpdateStream(), fr->stateGpu->xUpdatedOnDevice());
}
if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
void copyCoordinatesToGpu(gmx::ArrayRef<const gmx::RVec> h_x,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer for the H2D coordinates copy.
+ /*! \brief Get the event synchronizer of the coordinates ready for the consumption on the device.
*
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns the event synchronizer which indicates that the coordinates are ready for the
+ * consumption on the device. Takes into account that the producer may be different.
+ *
+ * If the update is offloaded, and the current step is not a DD/search step, the returned
+ * synchronizer indicates the completion of GPU update-constraint kernels. Otherwise, on search
+ * steps and if update is not offloaded, the coordinates are provided by the H2D copy and the
+ * returned synchronizer indicates that the copy is complete.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] simulationWork The simulation lifetime flags.
+ * \param[in] stepWork The step lifetime flags.
*
* \returns The event to synchronize the stream that consumes coordinates on device.
*/
- GpuEventSynchronizer* getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality,
+ const SimulationWorkload &simulationWork,
+ const StepWorkload &stepWork);
+
+ /*! \brief Getter for the event synchronizer for the update is done on th GPU
+ *
+ * \returns The event to synchronize the stream coordinates wre updated on device.
+ */
+ GpuEventSynchronizer* xUpdatedOnDevice();
/*! \brief Copy positions from the GPU memory.
*
return DeviceBuffer<float> {};
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getCoordinatesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
+GpuEventSynchronizer* StatePropagatorDataGpu::getCoordinatesReadyOnDeviceEvent(AtomLocality /* atomLocality */,
+ const SimulationWorkload & /* simulationWork */,
+ const StepWorkload & /* stepWork */)
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+ return nullptr;
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice()
{
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
return nullptr;
void copyCoordinatesToGpu(gmx::ArrayRef<const gmx::RVec> h_x,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer on the H2D coordinates copy.
+ /*! \brief Get the event synchronizer of the coordinates ready for the consumption on the device.
*
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns the event synchronizer which indicates that the coordinates are ready for the
+ * consumption on the device. Takes into account that the producer may be different.
+ *
+ * If the update is offloaded, and the current step is not a DD/search step, the returned
+ * synchronizer indicates the completion of GPU update-constraint kernels. Otherwise, on search
+ * steps and if update is not offloaded, the coordinates are provided by the H2D copy and the
+ * returned synchronizer indicates that the copy is complete.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] simulationWork The simulation lifetime flags.
+ * \param[in] stepWork The step lifetime flags.
*
* \returns The event to synchronize the stream that consumes coordinates on device.
*/
- GpuEventSynchronizer* getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality,
+ const SimulationWorkload &simulationWork,
+ const StepWorkload &stepWork);
+
+ /*! \brief Getter for the event synchronizer for the update is done on th GPU
+ *
+ * \returns The event to synchronize the stream coordinates wre updated on device.
+ */
+ GpuEventSynchronizer* xUpdatedOnDevice();
/*! \brief Copy positions from the GPU memory.
*
// Streams to use for forces H2D and D2H copies (one event for each atom locality)
EnumerationArray<AtomLocality, CommandStream> fCopyStreams_ = {{nullptr}};
- //! An array of events that indicate H2D copy is complete (one event for each atom locality)
+ /*! \brief An array of events that indicate H2D copy is complete (one event for each atom locality)
+ *
+ * \todo Reconsider naming. It should be xCopiedToDevice or xH2DCopyComplete, etc.
+ */
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnDevice_;
+ //! An event that the coordinates are ready after update-constraints execution
+ GpuEventSynchronizer xUpdatedOnDevice_;
//! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
}
}
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality,
+ const SimulationWorkload &simulationWork,
+ const StepWorkload &stepWork)
+{
+ // The provider of the coordinates may be different for local atoms. If the update is offloaded
+ // and this is not a neighbor search step, then the consumer needs to wait for the update
+ // to complete. Otherwise, the coordinates are copied from the host and we need to wait for
+ // the copy event. Non-local coordinates are always provided by the H2D copy.
+ //
+ // TODO: This should be reconsidered to support the halo exchange.
+ //
+ if (atomLocality == AtomLocality::Local && simulationWork.useGpuUpdate && !stepWork.doNeighborSearch)
+ {
+ return &xUpdatedOnDevice_;
+ }
+ else
+ {
+ return &xReadyOnDevice_[atomLocality];
+ }
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::xUpdatedOnDevice()
{
- return &xReadyOnDevice_[atomLocality];
+ return &xUpdatedOnDevice_;
}
void StatePropagatorDataGpu::Impl::copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVec> h_x,
return impl_->copyCoordinatesToGpu(h_x, atomLocality);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::getCoordinatesReadyOnDeviceEvent(AtomLocality atomLocality,
+ const SimulationWorkload &simulationWork,
+ const StepWorkload &stepWork)
+{
+ return impl_->getCoordinatesReadyOnDeviceEvent(atomLocality, simulationWork, stepWork);
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice()
{
- return impl_->getCoordinatesReadyOnDeviceEvent(atomLocality);
+ return impl_->xUpdatedOnDevice();
}
void StatePropagatorDataGpu::copyCoordinatesFromGpu(gmx::ArrayRef<RVec> h_x,