From fa49c1d85a7e1bcbae7cf16c54442d6e41ba7b41 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Tue, 17 Aug 2021 15:13:06 +0000 Subject: [PATCH] Move the ownership of the xUpdatedOnDevice event to update constraints --- src/gromacs/mdlib/update_constrain_gpu.h | 25 +++++------ .../mdlib/update_constrain_gpu_impl.cpp | 45 ++++++++----------- src/gromacs/mdlib/update_constrain_gpu_impl.h | 29 ++++++------ .../mdlib/update_constrain_gpu_impl_stubs.cpp | 3 +- src/gromacs/mdrun/md.cpp | 3 +- .../mdtypes/state_propagator_data_gpu.h | 6 +-- .../state_propagator_data_gpu_impl.cpp | 3 +- .../mdtypes/state_propagator_data_gpu_impl.h | 10 ++--- .../state_propagator_data_gpu_impl_gpu.cpp | 13 +++--- 9 files changed, 62 insertions(+), 75 deletions(-) diff --git a/src/gromacs/mdlib/update_constrain_gpu.h b/src/gromacs/mdlib/update_constrain_gpu.h index 9050039b13..d779a6a4cc 100644 --- a/src/gromacs/mdlib/update_constrain_gpu.h +++ b/src/gromacs/mdlib/update_constrain_gpu.h @@ -71,10 +71,7 @@ public: /*! \brief Create Update-Constrain object. * * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain - * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that - * modify coordinates. The event is maintained outside this class and also passed to all (if - * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr - * because the markEvent(...) method is called unconditionally. + * routines are executed. * * \param[in] ir Input record data: LINCS takes number of iterations and order of * projection from it. @@ -83,17 +80,14 @@ public: * \param[in] numTempScaleValues Number of temperature scaling groups. Zero for no temperature scaling. * \param[in] deviceContext GPU device context. * \param[in] deviceStream GPU stream to use. - * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done - * on the GPU. * \param[in] wcycle The wallclock counter */ - UpdateConstrainGpu(const t_inputrec& ir, - const gmx_mtop_t& mtop, - int numTempScaleValues, - const DeviceContext& deviceContext, - const DeviceStream& deviceStream, - GpuEventSynchronizer* xUpdatedOnDevice, - gmx_wallcycle* wcycle); + UpdateConstrainGpu(const t_inputrec& ir, + const gmx_mtop_t& mtop, + int numTempScaleValues, + const DeviceContext& deviceContext, + const DeviceStream& deviceStream, + gmx_wallcycle* wcycle); ~UpdateConstrainGpu(); @@ -166,9 +160,10 @@ public: */ void setPbc(PbcType pbcType, const matrix box); - /*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device. + /*! \brief Return the synchronizer associated with the event that indicates + * that the coordinates are ready on the device. */ - GpuEventSynchronizer* getCoordinatesReadySync(); + GpuEventSynchronizer* xUpdatedOnDeviceEvent(); /*! \brief * Returns whether the maximum number of coupled constraints is supported diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp index 9eceea17f1..5931dea8c9 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp @@ -117,7 +117,7 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer* fRead } } - coordinatesReady_->markEvent(deviceStream_); + xUpdatedOnDeviceEvent_.markEvent(deviceStream_); wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain); wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu); @@ -149,20 +149,14 @@ void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix) wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu); } -UpdateConstrainGpu::Impl::Impl(const t_inputrec& ir, - const gmx_mtop_t& mtop, - const int numTempScaleValues, - const DeviceContext& deviceContext, - const DeviceStream& deviceStream, - GpuEventSynchronizer* xUpdatedOnDevice, - gmx_wallcycle* wcycle) : - deviceContext_(deviceContext), - deviceStream_(deviceStream), - coordinatesReady_(xUpdatedOnDevice), - wcycle_(wcycle) +UpdateConstrainGpu::Impl::Impl(const t_inputrec& ir, + const gmx_mtop_t& mtop, + const int numTempScaleValues, + const DeviceContext& deviceContext, + const DeviceStream& deviceStream, + gmx_wallcycle* wcycle) : + deviceContext_(deviceContext), deviceStream_(deviceStream), wcycle_(wcycle) { - GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr."); - integrator_ = std::make_unique(deviceContext_, deviceStream_, numTempScaleValues); if (sc_haveGpuConstraintSupport) { @@ -220,19 +214,18 @@ void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box) setPbcAiuc(numPbcDimensions(pbcType), box, &pbcAiuc_); } -GpuEventSynchronizer* UpdateConstrainGpu::Impl::getCoordinatesReadySync() +GpuEventSynchronizer* UpdateConstrainGpu::Impl::xUpdatedOnDeviceEvent() { - return coordinatesReady_; + return &xUpdatedOnDeviceEvent_; } -UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& ir, - const gmx_mtop_t& mtop, - const int numTempScaleValues, - const DeviceContext& deviceContext, - const DeviceStream& deviceStream, - GpuEventSynchronizer* xUpdatedOnDevice, - gmx_wallcycle* wcycle) : - impl_(new Impl(ir, mtop, numTempScaleValues, deviceContext, deviceStream, xUpdatedOnDevice, wcycle)) +UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& ir, + const gmx_mtop_t& mtop, + const int numTempScaleValues, + const DeviceContext& deviceContext, + const DeviceStream& deviceStream, + gmx_wallcycle* wcycle) : + impl_(new Impl(ir, mtop, numTempScaleValues, deviceContext, deviceStream, wcycle)) { } @@ -285,9 +278,9 @@ void UpdateConstrainGpu::setPbc(const PbcType pbcType, const matrix box) impl_->setPbc(pbcType, box); } -GpuEventSynchronizer* UpdateConstrainGpu::getCoordinatesReadySync() +GpuEventSynchronizer* UpdateConstrainGpu::xUpdatedOnDeviceEvent() { - return impl_->getCoordinatesReadySync(); + return impl_->xUpdatedOnDeviceEvent(); } bool UpdateConstrainGpu::isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop) diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h index adfbccda85..bbba2b38a6 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.h +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h @@ -48,6 +48,9 @@ #include "gmxpre.h" +#include "config.h" + +#include "gromacs/gpu_utils/gpueventsynchronizer.h" #include "gromacs/mdlib/leapfrog_gpu.h" #include "gromacs/mdlib/lincs_gpu.h" #include "gromacs/mdlib/settle_gpu.h" @@ -67,10 +70,7 @@ public: /*! \brief Create Update-Constrain object. * * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain - * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that - * modify coordinates. The event is maintained outside this class and also passed to all (if - * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr - * because the markEvent(...) method is called unconditionally. + * routines are executed. * * \param[in] ir Input record data: LINCS takes number of iterations and order of * projection from it. @@ -79,17 +79,14 @@ public: * \param[in] numTempScaleValues Number of temperature scaling groups. Set zero for no temperature coupling. * \param[in] deviceContext GPU device context. * \param[in] deviceStream GPU stream to use. - * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that - * update is done on the GPU. * \param[in] wcycle The wallclock counter */ - Impl(const t_inputrec& ir, - const gmx_mtop_t& mtop, - int numTempScaleValues, - const DeviceContext& deviceContext, - const DeviceStream& deviceStream, - GpuEventSynchronizer* xUpdatedOnDevice, - gmx_wallcycle* wcycle); + Impl(const t_inputrec& ir, + const gmx_mtop_t& mtop, + int numTempScaleValues, + const DeviceContext& deviceContext, + const DeviceStream& deviceStream, + gmx_wallcycle* wcycle); ~Impl(); @@ -169,7 +166,7 @@ public: /*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device. */ - GpuEventSynchronizer* getCoordinatesReadySync(); + GpuEventSynchronizer* xUpdatedOnDeviceEvent(); /*! \brief * Returns whether the maximum number of coupled constraints is supported @@ -222,8 +219,8 @@ private: //! SETTLE GPU object for water constrains std::unique_ptr settleGpu_; - //! An pointer to the event to indicate when the update of coordinates is complete - GpuEventSynchronizer* coordinatesReady_; + //! The event to indicate when the update of coordinates is complete + GpuEventSynchronizer xUpdatedOnDeviceEvent_; //! The wallclock counter gmx_wallcycle* wcycle_ = nullptr; }; diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl_stubs.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl_stubs.cpp index 0ec9102a80..3eb3030baa 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl_stubs.cpp +++ b/src/gromacs/mdlib/update_constrain_gpu_impl_stubs.cpp @@ -61,7 +61,6 @@ UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir */, const int /* numTempScaleValues */, const DeviceContext& /* deviceContext */, const DeviceStream& /* deviceStream */, - GpuEventSynchronizer* /* xUpdatedOnDevice */, gmx_wallcycle* /*wcycle*/) : impl_(nullptr) { @@ -114,7 +113,7 @@ void UpdateConstrainGpu::setPbc(const PbcType /* pbcType */, const matrix /* box "A CPU stub for UpdateConstrain was called instead of the correct implementation."); } -GpuEventSynchronizer* UpdateConstrainGpu::getCoordinatesReadySync() +GpuEventSynchronizer* UpdateConstrainGpu::xUpdatedOnDeviceEvent() { GMX_ASSERT(!impl_, "A CPU stub for UpdateConstrain was called instead of the correct implementation."); diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 30f17fdf14..3b7466c229 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -482,9 +482,10 @@ void gmx::LegacySimulator::do_md() ekind->ngtc, fr->deviceStreamManager->context(), fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints), - stateGpu->xUpdatedOnDevice(), wcycle); + stateGpu->setXUpdatedOnDeviceEvent(integrator->xUpdatedOnDeviceEvent()); + integrator->setPbc(PbcType::Xyz, state->box); } diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu.h b/src/gromacs/mdtypes/state_propagator_data_gpu.h index 16e87d31e9..9e8dc576a6 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu.h @@ -206,11 +206,11 @@ public: */ void waitCoordinatesCopiedToDevice(AtomLocality atomLocality); - /*! \brief Getter for the event synchronizer for the update is done on th GPU + /*! \brief Setter for the event synchronizer for the update is done on th GPU * - * \returns The event to synchronize the stream coordinates wre updated on device. + * \param[in] xUpdatedOnDeviceEvent The event to synchronize the stream coordinates wre updated on device. */ - GpuEventSynchronizer* xUpdatedOnDevice(); + void setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent); /*! \brief Copy positions from the GPU memory. * diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp index 4f69d96370..d4fc488720 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp @@ -118,12 +118,11 @@ void StatePropagatorDataGpu::waitCoordinatesCopiedToDevice(AtomLocality /* atomL "GPU implementation."); } -GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice() +void StatePropagatorDataGpu::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* /* xUpdatedOnDeviceEvent */) { GMX_ASSERT(!impl_, "A CPU stub method from GPU state propagator data was called instead of one from " "GPU implementation."); - return nullptr; } void StatePropagatorDataGpu::copyCoordinatesToGpu(const gmx::ArrayRef /* h_x */, diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h index 026eff554c..4353eecf4e 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h @@ -190,11 +190,11 @@ public: */ void waitCoordinatesCopiedToDevice(AtomLocality atomLocality); - /*! \brief Getter for the event synchronizer for the update is done on th GPU + /*! \brief Setter for the event synchronizer for the update is done on th GPU * - * \returns The event to synchronize the stream coordinates wre updated on device. + * \param[in] xUpdatedOnDeviceEvent The event to synchronize the stream coordinates wre updated on device. */ - GpuEventSynchronizer* xUpdatedOnDevice(); + void setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent); /*! \brief Copy positions from the GPU memory. * @@ -333,8 +333,8 @@ private: * \todo Reconsider naming. It should be xCopiedToDevice or xH2DCopyComplete, etc. */ EnumerationArray xReadyOnDevice_; - //! An event that the coordinates are ready after update-constraints execution - GpuEventSynchronizer xUpdatedOnDevice_; + //! A pointer to an event that the coordinates are ready after update-constraints execution + GpuEventSynchronizer* xUpdatedOnDeviceEvent_ = nullptr; //! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality) EnumerationArray xReadyOnHost_; diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp index 28d60f8613..aa63099ffa 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp @@ -48,6 +48,7 @@ # include "gromacs/gpu_utils/device_stream_manager.h" # include "gromacs/gpu_utils/devicebuffer.h" +# include "gromacs/gpu_utils/gpueventsynchronizer.h" # include "gromacs/math/vectypes.h" # include "gromacs/mdtypes/state_propagator_data_gpu.h" # include "gromacs/timing/wallcycle.h" @@ -358,7 +359,8 @@ StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceEvent(AtomLocality atom } if (atomLocality == AtomLocality::Local && simulationWork.useGpuUpdate && !stepWork.doNeighborSearch) { - return &xUpdatedOnDevice_; + GMX_ASSERT(xUpdatedOnDeviceEvent_ != nullptr, "The event synchronizer can not be nullptr."); + return xUpdatedOnDeviceEvent_; } else { @@ -374,9 +376,10 @@ void StatePropagatorDataGpu::Impl::waitCoordinatesCopiedToDevice(AtomLocality at wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData); } -GpuEventSynchronizer* StatePropagatorDataGpu::Impl::xUpdatedOnDevice() +void StatePropagatorDataGpu::Impl::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent) { - return &xUpdatedOnDevice_; + GMX_ASSERT(xUpdatedOnDeviceEvent != nullptr, "The event synchronizer can not be nullptr."); + xUpdatedOnDeviceEvent_ = xUpdatedOnDeviceEvent; } void StatePropagatorDataGpu::Impl::copyCoordinatesFromGpu(gmx::ArrayRef h_x, AtomLocality atomLocality) @@ -624,9 +627,9 @@ void StatePropagatorDataGpu::waitCoordinatesCopiedToDevice(AtomLocality atomLoca return impl_->waitCoordinatesCopiedToDevice(atomLocality); } -GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice() +void StatePropagatorDataGpu::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent) { - return impl_->xUpdatedOnDevice(); + impl_->setXUpdatedOnDeviceEvent(xUpdatedOnDeviceEvent); } void StatePropagatorDataGpu::copyCoordinatesFromGpu(gmx::ArrayRef h_x, AtomLocality atomLocality) -- 2.22.0