Move the ownership of the xUpdatedOnDevice event to update constraints
authorArtem Zhmurov <zhmurov@gmail.com>
Tue, 17 Aug 2021 15:13:06 +0000 (15:13 +0000)
committerMark Abraham <mark.j.abraham@gmail.com>
Tue, 17 Aug 2021 15:13:06 +0000 (15:13 +0000)
src/gromacs/mdlib/update_constrain_gpu.h
src/gromacs/mdlib/update_constrain_gpu_impl.cpp
src/gromacs/mdlib/update_constrain_gpu_impl.h
src/gromacs/mdlib/update_constrain_gpu_impl_stubs.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/mdtypes/state_propagator_data_gpu.h
src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp
src/gromacs/mdtypes/state_propagator_data_gpu_impl.h
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index 9050039b13b7b52846746a1858d34a5be7c3320d..d779a6a4cc6005de4a45c2957d508b84f7f97999 100644 (file)
@@ -71,10 +71,7 @@ public:
     /*! \brief Create Update-Constrain object.
      *
      * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
-     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
-     * modify coordinates. The event is maintained outside this class and also passed to all (if
-     * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
-     * because the markEvent(...) method is called unconditionally.
+     * routines are executed.
      *
      * \param[in] ir                  Input record data: LINCS takes number of iterations and order of
      *                                projection from it.
@@ -83,17 +80,14 @@ public:
      * \param[in] numTempScaleValues  Number of temperature scaling groups. Zero for no temperature scaling.
      * \param[in] deviceContext       GPU device context.
      * \param[in] deviceStream        GPU stream to use.
-     * \param[in] xUpdatedOnDevice    The event synchronizer to use to mark that update is done
-     *                                on the GPU.
      * \param[in] wcycle              The wallclock counter
      */
-    UpdateConstrainGpu(const t_inputrec&     ir,
-                       const gmx_mtop_t&     mtop,
-                       int                   numTempScaleValues,
-                       const DeviceContext&  deviceContext,
-                       const DeviceStream&   deviceStream,
-                       GpuEventSynchronizer* xUpdatedOnDevice,
-                       gmx_wallcycle*        wcycle);
+    UpdateConstrainGpu(const t_inputrec&    ir,
+                       const gmx_mtop_t&    mtop,
+                       int                  numTempScaleValues,
+                       const DeviceContext& deviceContext,
+                       const DeviceStream&  deviceStream,
+                       gmx_wallcycle*       wcycle);
 
     ~UpdateConstrainGpu();
 
@@ -166,9 +160,10 @@ public:
      */
     void setPbc(PbcType pbcType, const matrix box);
 
-    /*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device.
+    /*! \brief Return the synchronizer associated with the event that indicates
+     * that the coordinates are ready on the device.
      */
-    GpuEventSynchronizer* getCoordinatesReadySync();
+    GpuEventSynchronizer* xUpdatedOnDeviceEvent();
 
     /*! \brief
      * Returns whether the maximum number of coupled constraints is supported
index 9eceea17f150f3ad3e74f0ba49a4445ea7686884..5931dea8c90feb0f093bdda8687f9ac8609e648e 100644 (file)
@@ -117,7 +117,7 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fRead
         }
     }
 
-    coordinatesReady_->markEvent(deviceStream_);
+    xUpdatedOnDeviceEvent_.markEvent(deviceStream_);
 
     wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
     wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
@@ -149,20 +149,14 @@ void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
     wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
 }
 
-UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
-                               const gmx_mtop_t&     mtop,
-                               const int             numTempScaleValues,
-                               const DeviceContext&  deviceContext,
-                               const DeviceStream&   deviceStream,
-                               GpuEventSynchronizer* xUpdatedOnDevice,
-                               gmx_wallcycle*        wcycle) :
-    deviceContext_(deviceContext),
-    deviceStream_(deviceStream),
-    coordinatesReady_(xUpdatedOnDevice),
-    wcycle_(wcycle)
+UpdateConstrainGpu::Impl::Impl(const t_inputrec&    ir,
+                               const gmx_mtop_t&    mtop,
+                               const int            numTempScaleValues,
+                               const DeviceContext& deviceContext,
+                               const DeviceStream&  deviceStream,
+                               gmx_wallcycle*       wcycle) :
+    deviceContext_(deviceContext), deviceStream_(deviceStream), wcycle_(wcycle)
 {
-    GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
-
     integrator_ = std::make_unique<LeapFrogGpu>(deviceContext_, deviceStream_, numTempScaleValues);
     if (sc_haveGpuConstraintSupport)
     {
@@ -220,19 +214,18 @@ void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box)
     setPbcAiuc(numPbcDimensions(pbcType), box, &pbcAiuc_);
 }
 
-GpuEventSynchronizer* UpdateConstrainGpu::Impl::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainGpu::Impl::xUpdatedOnDeviceEvent()
 {
-    return coordinatesReady_;
+    return &xUpdatedOnDeviceEvent_;
 }
 
-UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&     ir,
-                                       const gmx_mtop_t&     mtop,
-                                       const int             numTempScaleValues,
-                                       const DeviceContext&  deviceContext,
-                                       const DeviceStream&   deviceStream,
-                                       GpuEventSynchronizer* xUpdatedOnDevice,
-                                       gmx_wallcycle*        wcycle) :
-    impl_(new Impl(ir, mtop, numTempScaleValues, deviceContext, deviceStream, xUpdatedOnDevice, wcycle))
+UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&    ir,
+                                       const gmx_mtop_t&    mtop,
+                                       const int            numTempScaleValues,
+                                       const DeviceContext& deviceContext,
+                                       const DeviceStream&  deviceStream,
+                                       gmx_wallcycle*       wcycle) :
+    impl_(new Impl(ir, mtop, numTempScaleValues, deviceContext, deviceStream, wcycle))
 {
 }
 
@@ -285,9 +278,9 @@ void UpdateConstrainGpu::setPbc(const PbcType pbcType, const matrix box)
     impl_->setPbc(pbcType, box);
 }
 
-GpuEventSynchronizer* UpdateConstrainGpu::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainGpu::xUpdatedOnDeviceEvent()
 {
-    return impl_->getCoordinatesReadySync();
+    return impl_->xUpdatedOnDeviceEvent();
 }
 
 bool UpdateConstrainGpu::isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop)
index adfbccda8575cf0fda8099dfc521100d979ce96d..bbba2b38a67b2adecdea198fe18d5cb4d8d7bda8 100644 (file)
@@ -48,6 +48,9 @@
 
 #include "gmxpre.h"
 
+#include "config.h"
+
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
 #include "gromacs/mdlib/leapfrog_gpu.h"
 #include "gromacs/mdlib/lincs_gpu.h"
 #include "gromacs/mdlib/settle_gpu.h"
@@ -67,10 +70,7 @@ public:
     /*! \brief Create Update-Constrain object.
      *
      * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
-     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
-     * modify coordinates. The event is maintained outside this class and also passed to all (if
-     * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
-     * because the markEvent(...) method is called unconditionally.
+     * routines are executed.
      *
      * \param[in] ir                  Input record data: LINCS takes number of iterations and order of
      *                                projection from it.
@@ -79,17 +79,14 @@ public:
      * \param[in] numTempScaleValues  Number of temperature scaling groups. Set zero for no temperature coupling.
      * \param[in] deviceContext       GPU device context.
      * \param[in] deviceStream        GPU stream to use.
-     * \param[in] xUpdatedOnDevice    The event synchronizer to use to mark that
-     *                                update is done on the GPU.
      * \param[in] wcycle              The wallclock counter
      */
-    Impl(const t_inputrec&     ir,
-         const gmx_mtop_t&     mtop,
-         int                   numTempScaleValues,
-         const DeviceContext&  deviceContext,
-         const DeviceStream&   deviceStream,
-         GpuEventSynchronizer* xUpdatedOnDevice,
-         gmx_wallcycle*        wcycle);
+    Impl(const t_inputrec&    ir,
+         const gmx_mtop_t&    mtop,
+         int                  numTempScaleValues,
+         const DeviceContext& deviceContext,
+         const DeviceStream&  deviceStream,
+         gmx_wallcycle*       wcycle);
 
     ~Impl();
 
@@ -169,7 +166,7 @@ public:
 
     /*! \brief Return the synchronizer associated with the event indicated that the coordinates are ready on the device.
      */
-    GpuEventSynchronizer* getCoordinatesReadySync();
+    GpuEventSynchronizer* xUpdatedOnDeviceEvent();
 
     /*! \brief
      * Returns whether the maximum number of coupled constraints is supported
@@ -222,8 +219,8 @@ private:
     //! SETTLE GPU object for water constrains
     std::unique_ptr<SettleGpu> settleGpu_;
 
-    //! An pointer to the event to indicate when the update of coordinates is complete
-    GpuEventSynchronizer* coordinatesReady_;
+    //! The event to indicate when the update of coordinates is complete
+    GpuEventSynchronizer xUpdatedOnDeviceEvent_;
     //! The wallclock counter
     gmx_wallcycle* wcycle_ = nullptr;
 };
index 0ec9102a80fce6458f467af6dd319e60bdca8686..3eb3030baa5cfb5d6d619931eea8096b8b4f884f 100644 (file)
@@ -61,7 +61,6 @@ UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir   */,
                                        const int /* numTempScaleValues */,
                                        const DeviceContext& /* deviceContext */,
                                        const DeviceStream& /* deviceStream */,
-                                       GpuEventSynchronizer* /* xUpdatedOnDevice */,
                                        gmx_wallcycle* /*wcycle*/) :
     impl_(nullptr)
 {
@@ -114,7 +113,7 @@ void UpdateConstrainGpu::setPbc(const PbcType /* pbcType */, const matrix /* box
                "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
 }
 
-GpuEventSynchronizer* UpdateConstrainGpu::getCoordinatesReadySync()
+GpuEventSynchronizer* UpdateConstrainGpu::xUpdatedOnDeviceEvent()
 {
     GMX_ASSERT(!impl_,
                "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
index 30f17fdf14725c9d2a07588f3551a63a4535575e..3b7466c229cc2c2d8ff49f68d5d2ce3e1bb9b9c5 100644 (file)
@@ -482,9 +482,10 @@ void gmx::LegacySimulator::do_md()
                 ekind->ngtc,
                 fr->deviceStreamManager->context(),
                 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
-                stateGpu->xUpdatedOnDevice(),
                 wcycle);
 
+        stateGpu->setXUpdatedOnDeviceEvent(integrator->xUpdatedOnDeviceEvent());
+
         integrator->setPbc(PbcType::Xyz, state->box);
     }
 
index 16e87d31e928e67fc023fe04148da5af0a8ecf54..9e8dc576a6185fbef5f0830c346dac29a604f4e5 100644 (file)
@@ -206,11 +206,11 @@ public:
      */
     void waitCoordinatesCopiedToDevice(AtomLocality atomLocality);
 
-    /*! \brief Getter for the event synchronizer for the update is done on th GPU
+    /*! \brief Setter for the event synchronizer for the update is done on th GPU
      *
-     *  \returns  The event to synchronize the stream coordinates wre updated on device.
+     *  \param[in] xUpdatedOnDeviceEvent  The event to synchronize the stream coordinates wre updated on device.
      */
-    GpuEventSynchronizer* xUpdatedOnDevice();
+    void setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent);
 
     /*! \brief Copy positions from the GPU memory.
      *
index 4f69d963703f71f6d0ef4ed4411b4e93de365634..d4fc4887209c1048df40078c9bad77f5a774c5f0 100644 (file)
@@ -118,12 +118,11 @@ void StatePropagatorDataGpu::waitCoordinatesCopiedToDevice(AtomLocality /* atomL
                "GPU implementation.");
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice()
+void StatePropagatorDataGpu::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* /* xUpdatedOnDeviceEvent */)
 {
     GMX_ASSERT(!impl_,
                "A CPU stub method from GPU state propagator data was called instead of one from "
                "GPU implementation.");
-    return nullptr;
 }
 
 void StatePropagatorDataGpu::copyCoordinatesToGpu(const gmx::ArrayRef<const gmx::RVec> /* h_x */,
index 026eff554cf9bb8444bad907e1b67ed41eb0ebc9..4353eecf4ecf278d25ef632b8230b8cd45b03715 100644 (file)
@@ -190,11 +190,11 @@ public:
      */
     void waitCoordinatesCopiedToDevice(AtomLocality atomLocality);
 
-    /*! \brief Getter for the event synchronizer for the update is done on th GPU
+    /*! \brief Setter for the event synchronizer for the update is done on th GPU
      *
-     *  \returns  The event to synchronize the stream coordinates wre updated on device.
+     *  \param[in] xUpdatedOnDeviceEvent  The event to synchronize the stream coordinates wre updated on device.
      */
-    GpuEventSynchronizer* xUpdatedOnDevice();
+    void setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent);
 
     /*! \brief Copy positions from the GPU memory.
      *
@@ -333,8 +333,8 @@ private:
      * \todo Reconsider naming. It should be xCopiedToDevice or xH2DCopyComplete, etc.
      */
     EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnDevice_;
-    //! An event that the coordinates are ready after update-constraints execution
-    GpuEventSynchronizer xUpdatedOnDevice_;
+    //! A pointer to an event that the coordinates are ready after update-constraints execution
+    GpuEventSynchronizer* xUpdatedOnDeviceEvent_ = nullptr;
     //! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
     EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
 
index 28d60f86133fff57ceb6db3706aa78bee0a7e86d..aa63099ffa8b3a3f1387a604dccb6638b054671c 100644 (file)
@@ -48,6 +48,7 @@
 
 #    include "gromacs/gpu_utils/device_stream_manager.h"
 #    include "gromacs/gpu_utils/devicebuffer.h"
+#    include "gromacs/gpu_utils/gpueventsynchronizer.h"
 #    include "gromacs/math/vectypes.h"
 #    include "gromacs/mdtypes/state_propagator_data_gpu.h"
 #    include "gromacs/timing/wallcycle.h"
@@ -358,7 +359,8 @@ StatePropagatorDataGpu::Impl::getCoordinatesReadyOnDeviceEvent(AtomLocality atom
     }
     if (atomLocality == AtomLocality::Local && simulationWork.useGpuUpdate && !stepWork.doNeighborSearch)
     {
-        return &xUpdatedOnDevice_;
+        GMX_ASSERT(xUpdatedOnDeviceEvent_ != nullptr, "The event synchronizer can not be nullptr.");
+        return xUpdatedOnDeviceEvent_;
     }
     else
     {
@@ -374,9 +376,10 @@ void StatePropagatorDataGpu::Impl::waitCoordinatesCopiedToDevice(AtomLocality at
     wallcycle_stop(wcycle_, WallCycleCounter::WaitGpuStatePropagatorData);
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::xUpdatedOnDevice()
+void StatePropagatorDataGpu::Impl::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent)
 {
-    return &xUpdatedOnDevice_;
+    GMX_ASSERT(xUpdatedOnDeviceEvent != nullptr, "The event synchronizer can not be nullptr.");
+    xUpdatedOnDeviceEvent_ = xUpdatedOnDeviceEvent;
 }
 
 void StatePropagatorDataGpu::Impl::copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVec> h_x, AtomLocality atomLocality)
@@ -624,9 +627,9 @@ void StatePropagatorDataGpu::waitCoordinatesCopiedToDevice(AtomLocality atomLoca
     return impl_->waitCoordinatesCopiedToDevice(atomLocality);
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::xUpdatedOnDevice()
+void StatePropagatorDataGpu::setXUpdatedOnDeviceEvent(GpuEventSynchronizer* xUpdatedOnDeviceEvent)
 {
-    return impl_->xUpdatedOnDevice();
+    impl_->setXUpdatedOnDeviceEvent(xUpdatedOnDeviceEvent);
 }
 
 void StatePropagatorDataGpu::copyCoordinatesFromGpu(gmx::ArrayRef<RVec> h_x, AtomLocality atomLocality)