It was unused, since we were using the copied data only in the stream in
which we did the copy. So, the synchronization was performed implicitly,
with no need to wait for the event.
Also added a comment about a rare redundant copy.
Refs #3988
*md);
// Copy data to the GPU after buffers might have being reinitialized
*md);
// Copy data to the GPU after buffers might have being reinitialized
- // coordinates have been copied already if PME or buffer ops has not needed it this step.
+ /* The velocity copy is redundant if we had Center-of-Mass motion removed on
+ * the previous step. We don't check that now. */
stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
+ // coordinates have been copied already if PME or buffer ops has not needed it this step.
const bool useGpuPmeOnThisRank = runScheduleWork->simulationWork.useGpuPme
&& thisRankHasDuty(cr, DUTY_PME)
&& runScheduleWork->stepWork.computeSlowForces;
const bool useGpuPmeOnThisRank = runScheduleWork->simulationWork.useGpuPme
&& thisRankHasDuty(cr, DUTY_PME)
&& runScheduleWork->stepWork.computeSlowForces;
DeviceBuffer<RVec> getCoordinates();
/*! \brief Copy positions to the GPU memory.
DeviceBuffer<RVec> getCoordinates();
/*! \brief Copy positions to the GPU memory.
+ *
+ * Use \ref getCoordinatesReadyOnDeviceEvent to get the associated event synchronizer or
+ * \ref waitCoordinatesCopiedToDevice to wait for the copy completion.
+ * Note: the event is not marked in OpenCL, because it is not used.
*
* \param[in] h_x Positions in the host memory.
* \param[in] atomLocality Locality of the particles to copy.
*
* \param[in] h_x Positions in the host memory.
* \param[in] atomLocality Locality of the particles to copy.
DeviceBuffer<RVec> getVelocities();
/*! \brief Copy velocities to the GPU memory.
DeviceBuffer<RVec> getVelocities();
/*! \brief Copy velocities to the GPU memory.
+ *
+ * Does not mark any event, because we don't use it anywhere at the moment.
*
* \param[in] h_v Velocities in the host memory.
* \param[in] atomLocality Locality of the particles to copy.
*/
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
*
* \param[in] h_v Velocities in the host memory.
* \param[in] atomLocality Locality of the particles to copy.
*/
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
- /*! \brief Get the event synchronizer for the H2D velocities copy.
- *
- * \param[in] atomLocality Locality of the particles to wait for.
- *
- * \returns The event to synchronize the stream that consumes velocities on device.
- */
- GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
-
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
"GPU implementation.");
}
"GPU implementation.");
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
-{
- GMX_ASSERT(!impl_,
- "A CPU stub method from GPU state propagator data was called instead of one from "
- "GPU implementation.");
- return nullptr;
-}
-
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> /* h_v */,
AtomLocality /* atomLocality */)
{
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> /* h_v */,
AtomLocality /* atomLocality */)
{
*/
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
*/
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v, AtomLocality atomLocality);
- /*! \brief Get the event synchronizer on the H2D velocities copy.
- *
- * \param[in] atomLocality Locality of the particles to wait for.
- *
- * \returns The event to synchronize the stream that consumes velocities on device.
- */
- GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
-
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
//! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
//! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
- //! An array of events that indicate H2D copy of velocities is complete (one event for each atom locality)
- EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnDevice_;
//! An array of events that indicate D2H copy of velocities is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnHost_;
//! An array of events that indicate D2H copy of velocities is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnHost_;
wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
copyToDevice(d_v_, h_v, d_vSize_, atomLocality, *deviceStream);
wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
copyToDevice(d_v_, h_v, d_vSize_, atomLocality, *deviceStream);
- vReadyOnDevice_[atomLocality].markEvent(*deviceStream);
+ /* Not marking the event, because it is not used anywhere.
+ * Since we only use velocities on the device for update, and we launch the copy in
+ * the "update" stream, that should be safe.
+ */
wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
}
wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchStatePropagatorData);
wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
-{
- return &vReadyOnDevice_[atomLocality];
-}
-
-
void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v, AtomLocality atomLocality)
{
GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v, AtomLocality atomLocality)
{
GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
return impl_->copyVelocitiesToGpu(h_v, atomLocality);
}
return impl_->copyVelocitiesToGpu(h_v, atomLocality);
}
-GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
-{
- return impl_->getVelocitiesReadyOnDeviceEvent(atomLocality);
-}
-
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<RVec> h_v, AtomLocality atomLocality)
{
return impl_->copyVelocitiesFromGpu(h_v, atomLocality);
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<RVec> h_v, AtomLocality atomLocality)
{
return impl_->copyVelocitiesFromGpu(h_v, atomLocality);