void copyCoordinatesToGpu(gmx::ArrayRef<const gmx::RVec> h_x,
AtomLocality atomLocality);
- /*! \brief Get the event synchronizer on the H2D coordinates copy.
+ /*! \brief Get the event synchronizer for the H2D coordinates copy.
*
* \param[in] atomLocality Locality of the particles to wait for.
*
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v,
AtomLocality atomLocality);
+ /*! \brief Get the event synchronizer for the H2D velocities copy.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ *
+ * \returns The event to synchronize the stream that consumes velocities on device.
+ */
+ GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
+
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
void copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v,
AtomLocality atomLocality);
+ /*! \brief Wait until velocities are available on the host.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ */
+ void waitVelocitiesReadyOnHost(AtomLocality atomLocality);
+
/*! \brief Get the force buffer on the GPU.
*
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
+ /*! \brief Get the event synchronizer for the H2D forces copy.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ *
+ * \returns The event to synchronize the stream that consumes forces on device.
+ */
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+
/*! \brief Copy forces from the GPU memory.
*
* \param[in] h_f Forces buffer in the host memory.
void copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality);
+ /*! \brief Wait until forces are available on the host.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ */
+ void waitForcesReadyOnHost(AtomLocality atomLocality);
+
/*! \brief Getter for the update stream.
*
* \todo This is temporary here, until the management of this stream is taken over.
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
+GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+ return nullptr;
+}
+
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> /* h_v */,
AtomLocality /* atomLocality */)
{
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
+void StatePropagatorDataGpu::waitVelocitiesReadyOnHost(AtomLocality /* atomLocality */)
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+}
+
DeviceBuffer<float> StatePropagatorDataGpu::getForces()
{
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality /* atomLocality */)
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called insted of one from GPU implementation.");
+ return nullptr;
+}
+
void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> /* h_f */,
AtomLocality /* atomLocality */)
{
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
}
+void StatePropagatorDataGpu::waitForcesReadyOnHost(AtomLocality /* atomLocality */)
+{
+ GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
+}
+
+
void* StatePropagatorDataGpu::getUpdateStream()
{
GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation.");
void copyVelocitiesToGpu(gmx::ArrayRef<const gmx::RVec> h_v,
AtomLocality atomLocality);
+ /*! \brief Get the event synchronizer on the H2D velocities copy.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ *
+ * \returns The event to synchronize the stream that consumes velocities on device.
+ */
+ GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality);
+
/*! \brief Copy velocities from the GPU memory.
*
* \param[in] h_v Velocities buffer in the host memory.
void copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v,
AtomLocality atomLocality);
+ /*! \brief Wait until velocities are available on the host.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ */
+ void waitVelocitiesReadyOnHost(AtomLocality atomLocality);
+
/*! \brief Get the force buffer on the GPU.
*
void copyForcesToGpu(gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality);
+ /*! \brief Get the event synchronizer on the H2D forces copy.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ *
+ * \returns The event to synchronize the stream that consumes forces on device.
+ */
+ GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality);
+
/*! \brief Copy forces from the GPU memory.
*
* \param[in] h_f Forces buffer in the host memory.
void copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality);
+ /*! \brief Wait until forces are available on the host.
+ *
+ * \param[in] atomLocality Locality of the particles to wait for.
+ */
+ void waitForcesReadyOnHost(AtomLocality atomLocality);
+
/*! \brief Getter for the update stream.
*
* \todo This is temporary here, until the management of this stream is taken over.
//! GPU Update-constreaints stream.
CommandStream updateStream_ = nullptr;
- // Streams to use for coordinates H2S and D2H copies (one event for each atom locality)
+ // Streams to use for coordinates H2D and D2H copies (one event for each atom locality)
EnumerationArray<AtomLocality, CommandStream> xCopyStreams_ = {{nullptr}};
+ // Streams to use for velocities H2D and D2H copies (one event for each atom locality)
+ EnumerationArray<AtomLocality, CommandStream> vCopyStreams_ = {{nullptr}};
+ // Streams to use for forces H2D and D2H copies (one event for each atom locality)
+ EnumerationArray<AtomLocality, CommandStream> fCopyStreams_ = {{nullptr}};
//! An array of events that indicate H2D copy is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnDevice_;
- //! An array of events that indicate D2H copy is complete (one event for each atom locality)
+ //! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality)
EnumerationArray<AtomLocality, GpuEventSynchronizer> xReadyOnHost_;
+ //! An array of events that indicate H2D copy of velocities is complete (one event for each atom locality)
+ EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnDevice_;
+ //! An array of events that indicate D2H copy of velocities is complete (one event for each atom locality)
+ EnumerationArray<AtomLocality, GpuEventSynchronizer> vReadyOnHost_;
+
+ //! An array of events that indicate H2D copy of forces is complete (one event for each atom locality)
+ EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnDevice_;
+ //! An array of events that indicate D2H copy of forces is complete (one event for each atom locality)
+ EnumerationArray<AtomLocality, GpuEventSynchronizer> fReadyOnHost_;
+
/*! \brief GPU context (for OpenCL builds)
* \todo Make a Context class usable in CPU code
*/
GMX_UNUSED_VALUE(deviceContext);
}
- // Map the atom locality to the stream that will be used for coordinates transfer.
- // Same streams are used for H2D and D2H copies
+ // Map the atom locality to the stream that will be used for coordinates,
+ // velocities and forces transfers. Same streams are used for H2D and D2H copies.
+ // Note, that nullptr stream is used here to indicate that the copy is not supported.
xCopyStreams_[AtomLocality::Local] = updateStream_;
xCopyStreams_[AtomLocality::NonLocal] = nonLocalStream_;
xCopyStreams_[AtomLocality::All] = updateStream_;
+
+ vCopyStreams_[AtomLocality::Local] = updateStream_;
+ vCopyStreams_[AtomLocality::NonLocal] = nullptr;
+ vCopyStreams_[AtomLocality::All] = updateStream_;
+
+ fCopyStreams_[AtomLocality::Local] = localStream_;
+ fCopyStreams_[AtomLocality::NonLocal] = nonLocalStream_;
+ fCopyStreams_[AtomLocality::All] = nullptr;
}
StatePropagatorDataGpu::Impl::~Impl()
void StatePropagatorDataGpu::Impl::copyVelocitiesToGpu(const gmx::ArrayRef<const gmx::RVec> h_v,
AtomLocality atomLocality)
{
- // TODO: Use the correct stream
- copyToDevice(d_v_, h_v, d_vSize_, atomLocality, nullptr);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+ CommandStream commandStream = vCopyStreams_[atomLocality];
+ GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying velocities with given atom locality.");
+
+ copyToDevice(d_v_, h_v, d_vSize_, atomLocality, commandStream);
+ // TODO: Remove When event-based synchronization is introduced
+ gpuStreamSynchronize(commandStream);
+ vReadyOnDevice_[atomLocality].markEvent(commandStream);
+}
+
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
+{
+ return &vReadyOnDevice_[atomLocality];
}
+
void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v,
AtomLocality atomLocality)
{
- // TODO: Use the correct stream
- copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, nullptr);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+ CommandStream commandStream = vCopyStreams_[atomLocality];
+ GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying velocities with given atom locality.");
+
+ copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, commandStream);
+ // TODO: Remove When event-based synchronization is introduced
+ gpuStreamSynchronize(commandStream);
+ vReadyOnHost_[atomLocality].markEvent(commandStream);
+}
+
+void StatePropagatorDataGpu::Impl::waitVelocitiesReadyOnHost(AtomLocality atomLocality)
+{
+ vReadyOnHost_[atomLocality].waitForEvent();
}
void StatePropagatorDataGpu::Impl::copyForcesToGpu(const gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality)
{
- // TODO: Use the correct stream
- copyToDevice(d_f_, h_f, d_fSize_, atomLocality, nullptr);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+ CommandStream commandStream = fCopyStreams_[atomLocality];
+ GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality.");
+
+ copyToDevice(d_f_, h_f, d_fSize_, atomLocality, commandStream);
+ // TODO: Remove When event-based synchronization is introduced
+ gpuStreamSynchronize(commandStream);
+ fReadyOnDevice_[atomLocality].markEvent(commandStream);
}
+GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+{
+ return &fReadyOnDevice_[atomLocality];
+}
+
+
void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f,
AtomLocality atomLocality)
{
- // TODO: Use the correct stream
- copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, nullptr);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+ CommandStream commandStream = fCopyStreams_[atomLocality];
+ GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality.");
+
+ copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, commandStream);
+ // TODO: Remove When event-based synchronization is introduced
+ gpuStreamSynchronize(commandStream);
+ fReadyOnHost_[atomLocality].markEvent(commandStream);
+}
+
+void StatePropagatorDataGpu::Impl::waitForcesReadyOnHost(AtomLocality atomLocality)
+{
+ fReadyOnHost_[atomLocality].waitForEvent();
}
void* StatePropagatorDataGpu::Impl::getUpdateStream()
return impl_->copyVelocitiesToGpu(h_v, atomLocality);
}
+GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
+{
+ return impl_->getVelocitiesReadyOnDeviceEvent(atomLocality);
+}
+
void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef<RVec> h_v,
AtomLocality atomLocality)
{
return impl_->copyVelocitiesFromGpu(h_v, atomLocality);
}
+void StatePropagatorDataGpu::waitVelocitiesReadyOnHost(AtomLocality atomLocality)
+{
+ return impl_->waitVelocitiesReadyOnHost(atomLocality);
+}
+
DeviceBuffer<float> StatePropagatorDataGpu::getForces()
{
return impl_->copyForcesToGpu(h_f, atomLocality);
}
+GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality)
+{
+ return impl_->getForcesReadyOnDeviceEvent(atomLocality);
+}
+
void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef<RVec> h_f,
AtomLocality atomLocality)
{
return impl_->copyForcesFromGpu(h_f, atomLocality);
}
+void StatePropagatorDataGpu::waitForcesReadyOnHost(AtomLocality atomLocality)
+{
+ return impl_->waitForcesReadyOnHost(atomLocality);
+}
+
+
void* StatePropagatorDataGpu::getUpdateStream()
{
return impl_->getUpdateStream();