Centralize management of forces ready on GPU event

author Artem Zhmurov <zhmurov@gmail.com>

Thu, 10 Oct 2019 16:03:11 +0000 (18:03 +0200)

committer Artem Zhmurov <zhmurov@gmail.com>

Sat, 12 Oct 2019 14:49:20 +0000 (16:49 +0200)
author Artem Zhmurov <zhmurov@gmail.com>
Thu, 10 Oct 2019 16:03:11 +0000 (18:03 +0200)
committer Artem Zhmurov <zhmurov@gmail.com>
Sat, 12 Oct 2019 14:49:20 +0000 (16:49 +0200)
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 57ff728da03663cf55e723227cf47cc9f22233ba..8386e341393cb5b6050b380fd9836c92636bc4b9 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1506,7 +1506,8 @@ void do_force(FILE                                     *fplog,
                  if (haveNonLocalForceContribInCpuBuffer)
                  {
                      stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
-                    dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal));
+                    dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+                                                                                   useGpuFBufOps == BufferOpsUseGpu::True));
                  }
  
                  nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
@@ -1660,7 +1661,8 @@ void do_force(FILE                                     *fplog,
              if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
              {
                  stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
-                dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local));
+                dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+                                                                               useGpuFBufOps == BufferOpsUseGpu::True));
              }
              if (useGpuForcesHaloExchange)
              {
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu.h b/src/gromacs/mdtypes/state_propagator_data_gpu.h

index 13d00ab245ba4ba7a59820250a8400d798c107ae..086c43fd4e0023983252032b93ee4a23fa8760c6 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu.h
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu.h
@@ -266,13 +266,28 @@ class StatePropagatorDataGpu
          void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec>  h_f,
                               AtomLocality                    atomLocality);
  
-        /*! \brief Get the event synchronizer for the H2D forces copy.
+        /*! \brief Get the event synchronizer for the forces ready on device.
           *
-         *  \param[in] atomLocality  Locality of the particles to wait for.
+         *  Returns either of the event synchronizers, depending on the offload scenario
+         *  for the current simulation timestep:
+         *  1. The forces are copied to the device (when GPU buffer ops are off)
+         *  2. The forces are reduced on the device (GPU buffer ops are on)
+         *
+         *  \todo Pass step workload instead of the useGpuFBufferOps boolean.
+         *
+         *  \param[in] atomLocality      Locality of the particles to wait for.
+         *  \param[in] useGpuFBufferOps  If the force buffer ops are offloaded to the GPU.
           *
           *  \returns  The event to synchronize the stream that consumes forces on device.
           */
-        GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality  atomLocality);
+        GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality  atomLocality,
+                                                          bool          useGpuFBufferOps);
+
+        /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+         *
+         *  \returns  The event to mark when forces are reduced on the GPU.
+         */
+        GpuEventSynchronizer* fReducedOnDevice();
  
          /*! \brief Copy forces from the GPU memory.
           *
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp

index 1f6da3a5d7b0ad67f45130923bd05fbe81f228ff..68d16ccd3b3622173fe2f618c69c94634f6348dc 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp
@@ -162,9 +162,16 @@ void StatePropagatorDataGpu::copyForcesToGpu(const gmx::ArrayRef<const gmx::RVec
      GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
  }
  
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality  /* atomLocality */)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality  /* atomLocality     */,
+                                                                          bool          /* useGpuFBufferOps */)
  {
-    GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called insted of one from GPU implementation.");
+    GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+    return nullptr;
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
+{
+    GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
      return nullptr;
  }
  
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h

index b5fcffeb19ffcc429a99a5057fce781c14786747..bd8c770d0423088c3623bc49ba34790bc0e89c84 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h
@@ -252,13 +252,28 @@ class StatePropagatorDataGpu::Impl
          void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec>  h_f,
                               AtomLocality                    atomLocality);
  
-        /*! \brief Get the event synchronizer on the H2D forces copy.
+        /*! \brief Get the event synchronizer for the forces ready on device.
           *
-         *  \param[in] atomLocality  Locality of the particles to wait for.
+         *  Returns either of the event synchronizers, depending on the offload scenario
+         *  for the current simulation timestep:
+         *  1. The forces are copied to the device (when GPU buffer ops are off)
+         *  2. The forces are reduced on the device (GPU buffer ops are on)
+         *
+         *  \todo Pass step workload instead of the useGpuFBufferOps boolean.
+         *
+         *  \param[in] atomLocality      Locality of the particles to wait for.
+         *  \param[in] useGpuFBufferOps  If the force buffer ops are offloaded to the GPU.
           *
           *  \returns  The event to synchronize the stream that consumes forces on device.
           */
-        GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality  atomLocality);
+        GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality  atomLocality,
+                                                          bool          useGpuFBufferOps);
+
+        /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+         *
+         *  \returns  The event to mark when forces are reduced on the GPU.
+         */
+        GpuEventSynchronizer* fReducedOnDevice();
  
          /*! \brief Copy forces from the GPU memory.
           *
@@ -329,6 +344,8 @@ class StatePropagatorDataGpu::Impl
  
          //! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
          EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
+        //! An event that the forces were reduced on the GPU
+        GpuEventSynchronizer                                 fReducedOnDevice_;
          //! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
          EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;
  
diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp

index 88bb6e5fedd84b99458c36f45342e6562b90bd6c..2f66ea4d3e034928333b48eee78467e1ac9e66d0 100644 (file)
--- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
+++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp
@@ -376,11 +376,23 @@ void StatePropagatorDataGpu::Impl::copyForcesToGpu(const gmx::ArrayRef<const gmx
      fReadyOnDevice_[atomLocality].markEvent(commandStream);
  }
  
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality  atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality  atomLocality,
+                                                                                bool          useGpuFBufferOps)
  {
-    return &fReadyOnDevice_[atomLocality];
+    if ((atomLocality == AtomLocality::Local || atomLocality == AtomLocality::NonLocal) && useGpuFBufferOps)
+    {
+        return &fReducedOnDevice_;
+    }
+    else
+    {
+        return &fReadyOnDevice_[atomLocality];
+    }
  }
  
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::fReducedOnDevice()
+{
+    return &fReducedOnDevice_;
+}
  
  void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec>  h_f,
                                                       AtomLocality              atomLocality)
@@ -524,9 +536,15 @@ void StatePropagatorDataGpu::copyForcesToGpu(const gmx::ArrayRef<const gmx::RVec
      return impl_->copyForcesToGpu(h_f, atomLocality);
  }
  
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality  atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality  atomLocality,
+                                                                          bool          useGpuFBufferOps)
+{
+    return impl_->getForcesReadyOnDeviceEvent(atomLocality, useGpuFBufferOps);
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
  {
-    return impl_->getForcesReadyOnDeviceEvent(atomLocality);
+    return impl_->fReducedOnDevice();
  }
  
  void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<RVec>  h_f,
author	Artem Zhmurov <zhmurov@gmail.com>
	Thu, 10 Oct 2019 16:03:11 +0000 (18:03 +0200)
committer	Artem Zhmurov <zhmurov@gmail.com>
	Sat, 12 Oct 2019 14:49:20 +0000 (16:49 +0200)
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu.h		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl.h		patch \| blob \| history
src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp		patch \| blob \| history