if (haveNonLocalForceContribInCpuBuffer)
{
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), gmx::StatePropagatorDataGpu::AtomLocality::NonLocal);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal));
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::NonLocal,
+ useGpuFBufOps == BufferOpsUseGpu::True));
}
nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange)
{
stateGpu->copyForcesToGpu(forceWithShift, gmx::StatePropagatorDataGpu::AtomLocality::Local);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local));
+ dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(gmx::StatePropagatorDataGpu::AtomLocality::Local,
+ useGpuFBufOps == BufferOpsUseGpu::True));
}
if (useGpuForcesHaloExchange)
{
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer for the H2D forces copy.
+ /*! \brief Get the event synchronizer for the forces ready on device.
*
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns either of the event synchronizers, depending on the offload scenario
+ * for the current simulation timestep:
+ * 1. The forces are copied to the device (when GPU buffer ops are off)
+ * 2. The forces are reduced on the device (GPU buffer ops are on)
+ *
+ * \todo Pass step workload instead of the useGpuFBufferOps boolean.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] useGpuFBufferOps If the force buffer ops are offloaded to the GPU.
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
- GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps);
+
+ /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+ *
+ * \returns The event to mark when forces are reduced on the GPU.
+ */
+ GpuEventSynchronizer* fReducedOnDevice();
/*! \brief Copy forces from the GPU memory.
*
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality /* atomLocality */,
+ bool /* useGpuFBufferOps */)
{
- GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called insted of one from GPU implementation.");
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+ return nullptr;
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
return nullptr;
}
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer on the H2D forces copy.
+ /*! \brief Get the event synchronizer for the forces ready on device.
*
- * \param[in] atomLocality Locality of the particles to wait for.
+ * Returns either of the event synchronizers, depending on the offload scenario
+ * for the current simulation timestep:
+ * 1. The forces are copied to the device (when GPU buffer ops are off)
+ * 2. The forces are reduced on the device (GPU buffer ops are on)
+ *
+ * \todo Pass step workload instead of the useGpuFBufferOps boolean.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ * \param[in] useGpuFBufferOps If the force buffer ops are offloaded to the GPU.
*
* \returns The event to synchronize the stream that consumes forces on device.
*/
- GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps);
+
+ /*! \brief Getter for the event synchronizer for the forces are reduced on the GPU.
+ *
+ * \returns The event to mark when forces are reduced on the GPU.
+ */
+ GpuEventSynchronizer* fReducedOnDevice();
/*! \brief Copy forces from the GPU memory.
*
//! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
+ //! An event that the forces were reduced on the GPU
+ GpuEventSynchronizer fReducedOnDevice_;
//! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;
fReadyOnDevice_[atomLocality].markEvent(commandStream);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps)
{
- return &fReadyOnDevice_[atomLocality];
+ if ((atomLocality == AtomLocality::Local || atomLocality == AtomLocality::NonLocal) && useGpuFBufferOps)
+ {
+ return &fReducedOnDevice_;
+ }
+ else
+ {
+ return &fReadyOnDevice_[atomLocality];
+ }
}
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::fReducedOnDevice()
+{
+ return &fReducedOnDevice_;
+}
void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality)
return impl_->copyForcesToGpu(h_f, atomLocality);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
+ bool useGpuFBufferOps)
+{
+ return impl_->getForcesReadyOnDeviceEvent(atomLocality, useGpuFBufferOps);
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::fReducedOnDevice()
{
- return impl_->getForcesReadyOnDeviceEvent(atomLocality);
+ return impl_->fReducedOnDevice();
}
void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<RVec> h_f,