From: Artem Zhmurov Date: Thu, 10 Oct 2019 16:48:30 +0000 (+0200) Subject: Add separate constructor to StatePropagatorDataGpu for PME-only rank / PME tests X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=a7759162cd5e4735bf4639eba061b418f7ab461a;p=alexxy%2Fgromacs.git Add separate constructor to StatePropagatorDataGpu for PME-only rank / PME tests A separate constructor is added to the StatePropagatorDataGpu to use in the separate PME rank and in PME tests. These use the provided stream to copy coordinates for atom with Local or All localities. Copy of coordinates for non-local particles as well as copy operations for the forces and velocities are not allowed by assertions. Refs. #3126. Change-Id: I66aeeaea54931398b1a4a30b920b092f7d40ae16 --- diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp index 6cefcb8f32..10ceb2ce3b 100644 --- a/src/gromacs/ewald/pme_only.cpp +++ b/src/gromacs/ewald/pme_only.cpp @@ -559,10 +559,9 @@ int gmx_pmeonly(struct gmx_pme_t *pme, std::unique_ptr stateGpu; if (useGpuForPme) { - // TODO: The local and non-local nonbonded streams are passed as nullptrs, since they will be not used for the GPU buffer - // management in PME only ranks. Make the constructor safer. - stateGpu = std::make_unique(commandStream, nullptr, nullptr, - deviceContext, GpuApiCallBehavior::Async, paddingSize); + // TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here. + // This should be made safer. + stateGpu = std::make_unique(commandStream, deviceContext, GpuApiCallBehavior::Async, paddingSize); } diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index c82c0c685d..2153e2c139 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -172,7 +172,9 @@ std::unique_ptr makeStatePropagatorDataGpu(const gmx_pme_t &pme) { // TODO: Pin the host buffer and use async memory copies - return std::make_unique(pme_gpu_get_device_stream(&pme), nullptr, nullptr, + // TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to + // restrict one from using other constructor here. + return std::make_unique(pme_gpu_get_device_stream(&pme), pme_gpu_get_device_context(&pme), GpuApiCallBehavior::Sync, pme_gpu_get_padding_size(&pme)); diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu.h b/src/gromacs/mdtypes/state_propagator_data_gpu.h index 086c43fd4e..96066d0ac3 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu.h @@ -127,6 +127,27 @@ class StatePropagatorDataGpu const void *deviceContext, GpuApiCallBehavior transferKind, int paddingSize); + + /*! \brief Constructor to use in PME-only rank and in tests. + * + * This constructor should be used if only a coordinate device buffer should be managed + * using a single stream. Any operation on force or velocity buffer as well as copy of + * non-local coordinates will exit with assertion failure. Note, that the pmeStream can + * not be a nullptr and the constructor will exit with an assertion failure. + * + * \todo Currently, unsupported copy operations are blocked by assertion that the stream + * not nullptr. This should be improved. + * + * \param[in] pmeStream Device PME stream, nullptr is not allowed. + * \param[in] deviceContext Device context, nullptr allowed for non-OpenCL builds. + * \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not). + * \param[in] paddingSize Padding size for coordinates buffer. + */ + StatePropagatorDataGpu(const void *pmeStream, + const void *deviceContext, + GpuApiCallBehavior transferKind, + int paddingSize); + //! Move constructor StatePropagatorDataGpu(StatePropagatorDataGpu &&other) noexcept; //! Move assignment diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp index 68d16ccd3b..417d488a80 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.cpp @@ -64,6 +64,14 @@ StatePropagatorDataGpu::StatePropagatorDataGpu(const void * /* pmeStream { } +StatePropagatorDataGpu::StatePropagatorDataGpu(const void * /* pmeStream */, + const void * /* deviceContext */, + GpuApiCallBehavior /* transferKind */, + int /* paddingSize */) + : impl_(nullptr) +{ +} + StatePropagatorDataGpu::StatePropagatorDataGpu(StatePropagatorDataGpu && /* other */) noexcept = default; StatePropagatorDataGpu &StatePropagatorDataGpu::operator=(StatePropagatorDataGpu && /* other */) noexcept = default; diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h index bd8c770d04..1b687c022e 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl.h @@ -117,6 +117,26 @@ class StatePropagatorDataGpu::Impl GpuApiCallBehavior transferKind, int paddingSize); + /*! \brief Constructor to use in PME-only rank and in tests. + * + * This constructor should be used if only a coordinate device buffer should be managed + * using a single stream. Any operation on force or velocity buffer as well as copy of + * non-local coordinates will exit with assertion failure. Note, that the pmeStream can + * not be a nullptr and the constructor will exit with an assertion failure. + * + * \todo Currently, unsupported copy operations are blocked by assertion that the stream + * not nullptr. This should be improved. + * + * \param[in] pmeStream Device PME stream, nullptr is not allowed. + * \param[in] deviceContext Device context, nullptr allowed for non-OpenCL builds. + * \param[in] transferKind H2D/D2H transfer call behavior (synchronous or not). + * \param[in] paddingSize Padding size for coordinates buffer. + */ + Impl(const void *pmeStream, + const void *deviceContext, + GpuApiCallBehavior transferKind, + int paddingSize); + ~Impl(); diff --git a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp index 5aa64f6e25..70c73a1657 100644 --- a/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp +++ b/src/gromacs/mdtypes/state_propagator_data_gpu_impl_gpu.cpp @@ -128,6 +128,45 @@ StatePropagatorDataGpu::Impl::Impl(const void *pmeStream, fCopyStreams_[AtomLocality::All] = nullptr; } +StatePropagatorDataGpu::Impl::Impl(const void *pmeStream, + const void *deviceContext, + GpuApiCallBehavior transferKind, + int paddingSize) : + transferKind_(transferKind), + paddingSize_(paddingSize) +{ + static_assert(GMX_GPU != GMX_GPU_NONE, "This object should only be constructed on the GPU code-paths."); + GMX_RELEASE_ASSERT(getenv("GMX_USE_GPU_BUFFER_OPS") == nullptr, "GPU buffer ops are not supported in this build."); + + if (GMX_GPU == GMX_GPU_OPENCL) + { + GMX_ASSERT(deviceContext != nullptr, "GPU context should be set in OpenCL builds."); + deviceContext_ = *static_cast(deviceContext); + } + + GMX_ASSERT(pmeStream != nullptr, "GPU PME stream should be set."); + pmeStream_ = *static_cast(pmeStream); + + localStream_ = nullptr; + nonLocalStream_ = nullptr; + updateStream_ = nullptr; + + + // Only local/all coordinates are allowed to be copied in PME-only rank/ PME tests. + // This it temporary measure to make it safe to use this class in those cases. + xCopyStreams_[AtomLocality::Local] = pmeStream_; + xCopyStreams_[AtomLocality::NonLocal] = nullptr; + xCopyStreams_[AtomLocality::All] = pmeStream_; + + vCopyStreams_[AtomLocality::Local] = nullptr; + vCopyStreams_[AtomLocality::NonLocal] = nullptr; + vCopyStreams_[AtomLocality::All] = nullptr; + + fCopyStreams_[AtomLocality::Local] = nullptr; + fCopyStreams_[AtomLocality::NonLocal] = nullptr; + fCopyStreams_[AtomLocality::All] = nullptr; +} + StatePropagatorDataGpu::Impl::~Impl() { } @@ -440,6 +479,17 @@ StatePropagatorDataGpu::StatePropagatorDataGpu(const void *pmeStream, { } +StatePropagatorDataGpu::StatePropagatorDataGpu(const void *pmeStream, + const void *deviceContext, + GpuApiCallBehavior transferKind, + int paddingSize) + : impl_(new Impl(pmeStream, + deviceContext, + transferKind, + paddingSize)) +{ +} + StatePropagatorDataGpu::StatePropagatorDataGpu(StatePropagatorDataGpu && /* other */) noexcept = default; StatePropagatorDataGpu &StatePropagatorDataGpu::operator=(StatePropagatorDataGpu && /* other */) noexcept = default;