From: Artem Zhmurov Date: Fri, 4 Oct 2019 15:48:34 +0000 (+0200) Subject: Add management for velocities and forces copy events to StatePropagatorDataGpu X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=754dfec1b9cf5c0a81d90e6a6cce66212d6decd1;p=alexxy%2Fgromacs.git Add management for velocities and forces copy events to StatePropagatorDataGpu All H2D and D2H copies of velocities and forces now record an event, methods to synchronize on those events are added to the class. Change-Id: I910c5834d83f317f12c1fe0cd71ced168f412386 --- diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu.h b/src/gromacs/mdtypes/state_propagator_data_gpu.h index a061917ca7..5c20b44793 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu.h @@ -173,7 +173,7 @@ class StatePropagatorDataGpu void copyCoordinatesToGpu(gmx::ArrayRef h_x, AtomLocality atomLocality); - /*! \brief Get the event synchronizer on the H2D coordinates copy. + /*! \brief Get the event synchronizer for the H2D coordinates copy. * * \param[in] atomLocality Locality of the particles to wait for. * @@ -210,6 +210,14 @@ class StatePropagatorDataGpu void copyVelocitiesToGpu(gmx::ArrayRef h_v, AtomLocality atomLocality); + /*! \brief Get the event synchronizer for the H2D velocities copy. + * + * \param[in] atomLocality Locality of the particles to wait for. + * + * \returns The event to synchronize the stream that consumes velocities on device. + */ + GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality); + /*! \brief Copy velocities from the GPU memory. * * \param[in] h_v Velocities buffer in the host memory. @@ -218,6 +226,12 @@ class StatePropagatorDataGpu void copyVelocitiesFromGpu(gmx::ArrayRef h_v, AtomLocality atomLocality); + /*! \brief Wait until velocities are available on the host. + * + * \param[in] atomLocality Locality of the particles to wait for. + */ + void waitVelocitiesReadyOnHost(AtomLocality atomLocality); + /*! \brief Get the force buffer on the GPU. * @@ -233,6 +247,14 @@ class StatePropagatorDataGpu void copyForcesToGpu(gmx::ArrayRef h_f, AtomLocality atomLocality); + /*! \brief Get the event synchronizer for the H2D forces copy. + * + * \param[in] atomLocality Locality of the particles to wait for. + * + * \returns The event to synchronize the stream that consumes forces on device. + */ + GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality); + /*! \brief Copy forces from the GPU memory. * * \param[in] h_f Forces buffer in the host memory. @@ -241,6 +263,12 @@ class StatePropagatorDataGpu void copyForcesFromGpu(gmx::ArrayRef h_f, AtomLocality atomLocality); + /*! \brief Wait until forces are available on the host. + * + * \param[in] atomLocality Locality of the particles to wait for. + */ + void waitForcesReadyOnHost(AtomLocality atomLocality); + /*! \brief Getter for the update stream. * * \todo This is temporary here, until the management of this stream is taken over. diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp index 7e2072f7da..6160bdf5a0 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp @@ -124,12 +124,23 @@ void StatePropagatorDataGpu::copyVelocitiesToGpu(const gmx::ArrayRef /* h_v */, AtomLocality /* atomLocality */) { GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation."); } +void StatePropagatorDataGpu::waitVelocitiesReadyOnHost(AtomLocality /* atomLocality */) +{ + GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation."); +} + DeviceBuffer StatePropagatorDataGpu::getForces() { @@ -143,12 +154,24 @@ void StatePropagatorDataGpu::copyForcesToGpu(const gmx::ArrayRef /* h_f */, AtomLocality /* atomLocality */) { GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation."); } +void StatePropagatorDataGpu::waitForcesReadyOnHost(AtomLocality /* atomLocality */) +{ + GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation."); +} + + void* StatePropagatorDataGpu::getUpdateStream() { GMX_ASSERT(false, "A CPU stub method from GPU state propagator data was called instead of one from GPU implementation."); diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h index 037eeadedf..cb15236738 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h @@ -197,6 +197,14 @@ class StatePropagatorDataGpu::Impl void copyVelocitiesToGpu(gmx::ArrayRef h_v, AtomLocality atomLocality); + /*! \brief Get the event synchronizer on the H2D velocities copy. + * + * \param[in] atomLocality Locality of the particles to wait for. + * + * \returns The event to synchronize the stream that consumes velocities on device. + */ + GpuEventSynchronizer* getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality); + /*! \brief Copy velocities from the GPU memory. * * \param[in] h_v Velocities buffer in the host memory. @@ -205,6 +213,12 @@ class StatePropagatorDataGpu::Impl void copyVelocitiesFromGpu(gmx::ArrayRef h_v, AtomLocality atomLocality); + /*! \brief Wait until velocities are available on the host. + * + * \param[in] atomLocality Locality of the particles to wait for. + */ + void waitVelocitiesReadyOnHost(AtomLocality atomLocality); + /*! \brief Get the force buffer on the GPU. * @@ -220,6 +234,14 @@ class StatePropagatorDataGpu::Impl void copyForcesToGpu(gmx::ArrayRef h_f, AtomLocality atomLocality); + /*! \brief Get the event synchronizer on the H2D forces copy. + * + * \param[in] atomLocality Locality of the particles to wait for. + * + * \returns The event to synchronize the stream that consumes forces on device. + */ + GpuEventSynchronizer* getForcesReadyOnDeviceEvent(AtomLocality atomLocality); + /*! \brief Copy forces from the GPU memory. * * \param[in] h_f Forces buffer in the host memory. @@ -228,6 +250,12 @@ class StatePropagatorDataGpu::Impl void copyForcesFromGpu(gmx::ArrayRef h_f, AtomLocality atomLocality); + /*! \brief Wait until forces are available on the host. + * + * \param[in] atomLocality Locality of the particles to wait for. + */ + void waitForcesReadyOnHost(AtomLocality atomLocality); + /*! \brief Getter for the update stream. * * \todo This is temporary here, until the management of this stream is taken over. @@ -259,14 +287,28 @@ class StatePropagatorDataGpu::Impl //! GPU Update-constreaints stream. CommandStream updateStream_ = nullptr; - // Streams to use for coordinates H2S and D2H copies (one event for each atom locality) + // Streams to use for coordinates H2D and D2H copies (one event for each atom locality) EnumerationArray xCopyStreams_ = {{nullptr}}; + // Streams to use for velocities H2D and D2H copies (one event for each atom locality) + EnumerationArray vCopyStreams_ = {{nullptr}}; + // Streams to use for forces H2D and D2H copies (one event for each atom locality) + EnumerationArray fCopyStreams_ = {{nullptr}}; //! An array of events that indicate H2D copy is complete (one event for each atom locality) EnumerationArray xReadyOnDevice_; - //! An array of events that indicate D2H copy is complete (one event for each atom locality) + //! An array of events that indicate D2H copy of coordinates is complete (one event for each atom locality) EnumerationArray xReadyOnHost_; + //! An array of events that indicate H2D copy of velocities is complete (one event for each atom locality) + EnumerationArray vReadyOnDevice_; + //! An array of events that indicate D2H copy of velocities is complete (one event for each atom locality) + EnumerationArray vReadyOnHost_; + + //! An array of events that indicate H2D copy of forces is complete (one event for each atom locality) + EnumerationArray fReadyOnDevice_; + //! An array of events that indicate D2H copy of forces is complete (one event for each atom locality) + EnumerationArray fReadyOnHost_; + /*! \brief GPU context (for OpenCL builds) * \todo Make a Context class usable in CPU code */ diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp index ba6850db45..7070891d6c 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp @@ -112,11 +112,20 @@ StatePropagatorDataGpu::Impl::Impl(const void *pmeStream, GMX_UNUSED_VALUE(deviceContext); } - // Map the atom locality to the stream that will be used for coordinates transfer. - // Same streams are used for H2D and D2H copies + // Map the atom locality to the stream that will be used for coordinates, + // velocities and forces transfers. Same streams are used for H2D and D2H copies. + // Note, that nullptr stream is used here to indicate that the copy is not supported. xCopyStreams_[AtomLocality::Local] = updateStream_; xCopyStreams_[AtomLocality::NonLocal] = nonLocalStream_; xCopyStreams_[AtomLocality::All] = updateStream_; + + vCopyStreams_[AtomLocality::Local] = updateStream_; + vCopyStreams_[AtomLocality::NonLocal] = nullptr; + vCopyStreams_[AtomLocality::All] = updateStream_; + + fCopyStreams_[AtomLocality::Local] = localStream_; + fCopyStreams_[AtomLocality::NonLocal] = nonLocalStream_; + fCopyStreams_[AtomLocality::All] = nullptr; } StatePropagatorDataGpu::Impl::~Impl() @@ -293,15 +302,38 @@ DeviceBuffer StatePropagatorDataGpu::Impl::getVelocities() void StatePropagatorDataGpu::Impl::copyVelocitiesToGpu(const gmx::ArrayRef h_v, AtomLocality atomLocality) { - // TODO: Use the correct stream - copyToDevice(d_v_, h_v, d_vSize_, atomLocality, nullptr); + GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality."); + CommandStream commandStream = vCopyStreams_[atomLocality]; + GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying velocities with given atom locality."); + + copyToDevice(d_v_, h_v, d_vSize_, atomLocality, commandStream); + // TODO: Remove When event-based synchronization is introduced + gpuStreamSynchronize(commandStream); + vReadyOnDevice_[atomLocality].markEvent(commandStream); +} + +GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality) +{ + return &vReadyOnDevice_[atomLocality]; } + void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef h_v, AtomLocality atomLocality) { - // TODO: Use the correct stream - copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, nullptr); + GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality."); + CommandStream commandStream = vCopyStreams_[atomLocality]; + GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying velocities with given atom locality."); + + copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, commandStream); + // TODO: Remove When event-based synchronization is introduced + gpuStreamSynchronize(commandStream); + vReadyOnHost_[atomLocality].markEvent(commandStream); +} + +void StatePropagatorDataGpu::Impl::waitVelocitiesReadyOnHost(AtomLocality atomLocality) +{ + vReadyOnHost_[atomLocality].waitForEvent(); } @@ -313,15 +345,38 @@ DeviceBuffer StatePropagatorDataGpu::Impl::getForces() void StatePropagatorDataGpu::Impl::copyForcesToGpu(const gmx::ArrayRef h_f, AtomLocality atomLocality) { - // TODO: Use the correct stream - copyToDevice(d_f_, h_f, d_fSize_, atomLocality, nullptr); + GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality."); + CommandStream commandStream = fCopyStreams_[atomLocality]; + GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality."); + + copyToDevice(d_f_, h_f, d_fSize_, atomLocality, commandStream); + // TODO: Remove When event-based synchronization is introduced + gpuStreamSynchronize(commandStream); + fReadyOnDevice_[atomLocality].markEvent(commandStream); } +GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality) +{ + return &fReadyOnDevice_[atomLocality]; +} + + void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef h_f, AtomLocality atomLocality) { - // TODO: Use the correct stream - copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, nullptr); + GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality."); + CommandStream commandStream = fCopyStreams_[atomLocality]; + GMX_ASSERT(commandStream != nullptr, "No stream is valid for copying forces with given atom locality."); + + copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, commandStream); + // TODO: Remove When event-based synchronization is introduced + gpuStreamSynchronize(commandStream); + fReadyOnHost_[atomLocality].markEvent(commandStream); +} + +void StatePropagatorDataGpu::Impl::waitForcesReadyOnHost(AtomLocality atomLocality) +{ + fReadyOnHost_[atomLocality].waitForEvent(); } void* StatePropagatorDataGpu::Impl::getUpdateStream() @@ -413,12 +468,22 @@ void StatePropagatorDataGpu::copyVelocitiesToGpu(const gmx::ArrayRefcopyVelocitiesToGpu(h_v, atomLocality); } +GpuEventSynchronizer* StatePropagatorDataGpu::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality) +{ + return impl_->getVelocitiesReadyOnDeviceEvent(atomLocality); +} + void StatePropagatorDataGpu::copyVelocitiesFromGpu(gmx::ArrayRef h_v, AtomLocality atomLocality) { return impl_->copyVelocitiesFromGpu(h_v, atomLocality); } +void StatePropagatorDataGpu::waitVelocitiesReadyOnHost(AtomLocality atomLocality) +{ + return impl_->waitVelocitiesReadyOnHost(atomLocality); +} + DeviceBuffer StatePropagatorDataGpu::getForces() { @@ -431,12 +496,23 @@ void StatePropagatorDataGpu::copyForcesToGpu(const gmx::ArrayRefcopyForcesToGpu(h_f, atomLocality); } +GpuEventSynchronizer* StatePropagatorDataGpu::getForcesReadyOnDeviceEvent(AtomLocality atomLocality) +{ + return impl_->getForcesReadyOnDeviceEvent(atomLocality); +} + void StatePropagatorDataGpu::copyForcesFromGpu(gmx::ArrayRef h_f, AtomLocality atomLocality) { return impl_->copyForcesFromGpu(h_f, atomLocality); } +void StatePropagatorDataGpu::waitForcesReadyOnHost(AtomLocality atomLocality) +{ + return impl_->waitForcesReadyOnHost(atomLocality); +} + + void* StatePropagatorDataGpu::getUpdateStream() { return impl_->getUpdateStream();