Remove StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent
authorAndrey Alekseenko <al42and@gmail.com>
Tue, 3 Aug 2021 07:01:34 +0000 (07:01 +0000)
committerMark Abraham <mark.j.abraham@gmail.com>
Tue, 3 Aug 2021 07:01:34 +0000 (07:01 +0000)
It was unused, since we were using the copied data only in the stream in
which we did the copy. So, the synchronization was performed implicitly,
with no need to wait for the event.

Also added a comment about a rare redundant copy.

Refs #3988

src/gromacs/mdrun/md.cpp
src/gromacs/mdtypes/state_propagator_data_gpu.h
src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp
src/gromacs/mdtypes/state_propagator_data_gpu_impl.h
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index c26f8f96847d15469aad0d7655d13e39ea5ddd0f..30f17fdf14725c9d2a07588f3551a63a4535575e 100644 (file)
@@ -1507,8 +1507,10 @@ void gmx::LegacySimulator::do_md()
                                     *md);
 
                     // Copy data to the GPU after buffers might have being reinitialized
-                    // coordinates have been copied already if PME or buffer ops has not needed it this step.
+                    /* The velocity copy is redundant if we had Center-of-Mass motion removed on
+                     * the previous step. We don't check that now. */
                     stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
+                    // coordinates have been copied already if PME or buffer ops has not needed it this step.
                     const bool useGpuPmeOnThisRank = runScheduleWork->simulationWork.useGpuPme
                                                      && thisRankHasDuty(cr, DUTY_PME)
                                                      && runScheduleWork->stepWork.computeSlowForces;
index cd119960f39eef8b3907a28cd76e06e8d793cb8a..16e87d31e928e67fc023fe04148da5af0a8ecf54 100644 (file)
@@ -168,6 +168,10 @@ public:
     DeviceBuffer<RVec> getCoordinates();
 
     /*! \brief Copy positions to the GPU memory.
+     *
+     * Use \ref getCoordinatesReadyOnDeviceEvent to get the associated event synchronizer or
+     * \ref waitCoordinatesCopiedToDevice to wait for the copy completion.
+     * Note: the event is not marked in OpenCL, because it is not used.
      *
      *  \param[in] h_x           Positions in the host memory.
      *  \param[in] atomLocality  Locality of the particles to copy.
@@ -229,20 +233,14 @@ public:
     DeviceBuffer<RVec> getVelocities();
 
     /*! \brief Copy velocities to the GPU memory.
+     *
+     * Does not mark any event, because we don't use it anywhere at the moment.
      *
      *  \param[in] h_v           Velocities in the host memory.
      *  \param[in] atomLocality  Locality of the particles to copy.
      */
     void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
 
-    /*! \brief Get the event synchronizer for the H2D velocities copy.
-     *
-     *  \param[in] atomLocality  Locality of the particles to wait for.
-     *
-     *  \returns  The event to synchronize the stream that consumes velocities on device.
-     */
-    GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
-
     /*! \brief Copy velocities from the GPU memory.
      *
      *  \param[in] h_v           Velocities buffer in the host memory.
index 0ce99a4dd4c4ee58f3e597cd752277b4bde3942a..4f69d963703f71f6d0ef4ed4411b4e93de365634 100644 (file)
@@ -166,14 +166,6 @@ void StatePropagatorDataGpu::copyVelocitiesToGpu(const gmx::ArrayRef<const gmx::
                "GPU implementation.");
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
-{
-    GMX_ASSERT(!impl_,
-               "A CPU stub method from GPU state propagator data was called instead of one from "
-               "GPU implementation.");
-    return nullptr;
-}
-
 void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> /* h_v          */,
                                                    AtomLocality /* atomLocality */)
 {
index 0a041db04f61d47b611b259b8dd2044ffe7370ba..cfceab3bff26cb44fe19ac97a9de4026b3c66529 100644 (file)
@@ -230,14 +230,6 @@ public:
      */
     void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
 
-    /*! \brief Get the event synchronizer on the H2D velocities copy.
-     *
-     *  \param[in] atomLocality  Locality of the particles to wait for.
-     *
-     *  \returns  The event to synchronize the stream that consumes velocities on device.
-     */
-    GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
-
     /*! \brief Copy velocities from the GPU memory.
      *
      *  \param[in] h_v           Velocities buffer in the host memory.
@@ -353,8 +345,6 @@ private:
     //! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
     EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
 
-    //! An array of events that indicate H2D copy of velocities is complete (one event for each atom locality)
-    EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnDevice_;
     //! An array of events that indicate D2H copy of velocities is complete (one event for each atom locality)
     EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnHost_;
 
index d4868d20dc6b4945b722b129b912863458b7399b..93a0839b9e617007bcad3fbf47296dca6d58ab97 100644 (file)
@@ -417,18 +417,15 @@ void StatePropagatorDataGpu::Impl::copyVelocitiesToGpu(const gmx::ArrayRef<const
     wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
 
     copyToDevice(d_v_, h_v, d_vSize_, atomLocality, *deviceStream);
-    vReadyOnDevice_[atomLocality].markEvent(*deviceStream);
+    /* Not marking the event, because it is not used anywhere.
+     * Since we only use velocities on the device for update, and we launch the copy in
+     * the "update" stream, that should be safe.
+     */
 
     wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
     wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
-{
-    return &vReadyOnDevice_[atomLocality];
-}
-
-
 void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v, AtomLocality atomLocality)
 {
     GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
@@ -637,11 +634,6 @@ void StatePropagatorDataGpu::copyVelocitiesToGpu(const gmx::ArrayRef<const gmx::
     return impl_->copyVelocitiesToGpu(h_v, atomLocality);
 }
 
-GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
-{
-    return impl_->getVelocitiesReadyOnDeviceEvent(atomLocality);
-}
-
 void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<RVec> h_v, AtomLocality atomLocality)
 {
     return impl_->copyVelocitiesFromGpu(h_v, atomLocality);