This change adds the GpuEventSynchronizer for the forces reduced on GPU event
to the StatePropagatorDataGpu. This event should be marked if the buffer ops
are offloaded when the force reduction is done. The consumers of of the forces
on the GPU will get this event or the event on the H2D copy is done,
depending on the current step workload and offload scenario.
Change-Id: Ib559dbed5ad777eac3a906e4ee0ebaa07caf0ac1
if (haveNonLocalForceContribInCpuBuffer)
{
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
if (haveNonLocalForceContribInCpuBuffer)
{
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal));
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ useGpuFBufOps == BufferOpsUseGpu::True));
}
nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
}
nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
{
stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
{
stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local));
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ useGpuFBufOps == BufferOpsUseGpu::True));
}
if (useGpuForcesHaloExchange)
{
}
if (useGpuForcesHaloExchange)
{
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer for the H2D forces copy.
+ /*! \brief Get the event synchronizer for the forces ready on device.
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns either of the event synchronizers, depending on the offload scenario
+ * for the current simulation timestep:
+ * 1. The forces are copied to the device (when GPU buffer ops are off)
+ * 2. The forces are reduced on the device (GPU buffer ops are on)
+ *
+ * \todo Pass step workload instead of the useGpuFBufferOps boolean.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] useGpuFBufferOps If the force buffer ops are offloaded to the GPU.
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
- GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps);
+
+ /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+ *
+ * \returns The event to mark when forces are reduced on the GPU.
+ */
+ GpuEventSynchronizer* fReducedOnDevice();
/*! \brief Copy forces from the GPU memory.
*
/*! \brief Copy forces from the GPU memory.
*
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality /* atomLocality */,
+ bool /* useGpuFBufferOps */)
- GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called insted of one from GPU implementation.");
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+ return nullptr;
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer on the H2D forces copy.
+ /*! \brief Get the event synchronizer for the forces ready on device.
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns either of the event synchronizers, depending on the offload scenario
+ * for the current simulation timestep:
+ * 1. The forces are copied to the device (when GPU buffer ops are off)
+ * 2. The forces are reduced on the device (GPU buffer ops are on)
+ *
+ * \todo Pass step workload instead of the useGpuFBufferOps boolean.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] useGpuFBufferOps If the force buffer ops are offloaded to the GPU.
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
- GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps);
+
+ /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+ *
+ * \returns The event to mark when forces are reduced on the GPU.
+ */
+ GpuEventSynchronizer* fReducedOnDevice();
/*! \brief Copy forces from the GPU memory.
*
/*! \brief Copy forces from the GPU memory.
*
//! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
//! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
+ //! An event that the forces were reduced on the GPU
+ GpuEventSynchronizer fReducedOnDevice_;
//! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;
//! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;
fReadyOnDevice_[atomLocality].markEvent(commandStream);
}
fReadyOnDevice_[atomLocality].markEvent(commandStream);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps)
- return &fReadyOnDevice_[atomLocality];
+ if ((atomLocality == AtomLocality::Local || atomLocality == AtomLocality::NonLocal) && useGpuFBufferOps)
+ {
+ return &fReducedOnDevice_;
+ }
+ else
+ {
+ return &fReadyOnDevice_[atomLocality];
+ }
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::fReducedOnDevice()
+{
+ return &fReducedOnDevice_;
+}
void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality)
void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality)
return impl_->copyForcesToGpu(h_f, atomLocality);
}
return impl_->copyForcesToGpu(h_f, atomLocality);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps)
+{
+ return impl_->getForcesReadyOnDeviceEvent(atomLocality, useGpuFBufferOps);
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
- return impl_->getForcesReadyOnDeviceEvent(atomLocality);
+ return impl_->fReducedOnDevice();
}
void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<RVec> h_f,
}
void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<RVec> h_f,