From b7aaefb2908e2d73815b7ad19c639e0e50fbff9b Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Tue, 16 Mar 2021 11:03:24 +0100 Subject: [PATCH] Use DeviceBuffer in GPU force reduction and PME code ... instead of raw device pointers. Preparation for #3932. PME change is incidental, the main focus is GpuForceReduction. --- src/gromacs/ewald/pme.h | 6 +++--- src/gromacs/ewald/pme_force_sender_gpu.h | 3 ++- src/gromacs/ewald/pme_force_sender_gpu_impl.cpp | 3 ++- src/gromacs/ewald/pme_force_sender_gpu_impl.cu | 4 ++-- src/gromacs/ewald/pme_force_sender_gpu_impl.h | 7 +++++-- src/gromacs/ewald/pme_gpu.cpp | 4 ++-- src/gromacs/ewald/pme_gpu_internal.cpp | 4 ++-- src/gromacs/ewald/pme_gpu_internal.h | 4 ++-- src/gromacs/ewald/pme_only.cpp | 3 +-- src/gromacs/ewald/pme_pp_comm_gpu.h | 3 ++- src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp | 4 ++-- src/gromacs/ewald/pme_pp_comm_gpu_impl.cu | 10 ++++++---- src/gromacs/ewald/pme_pp_comm_gpu_impl.h | 6 ++++-- src/gromacs/mdlib/gpuforcereduction.h | 2 +- src/gromacs/mdlib/gpuforcereduction_impl.cpp | 2 +- src/gromacs/mdlib/gpuforcereduction_impl.cu | 13 ++++++++----- ...ereduction_impl.cuh => gpuforcereduction_impl.h} | 12 ++++++------ src/gromacs/mdlib/sim_util.cpp | 7 ++++--- src/gromacs/mdlib/update_constrain_gpu_impl.cu | 1 + src/gromacs/mdlib/update_constrain_gpu_impl.h | 3 ++- 20 files changed, 58 insertions(+), 43 deletions(-) rename src/gromacs/mdlib/{gpuforcereduction_impl.cuh => gpuforcereduction_impl.h} (95%) diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index c9f1a6e280..d897c0fb0c 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -467,12 +467,12 @@ GPU_FUNC_QUALIFIER void pme_gpu_set_device_x(const gmx_pme_t* GPU_FUNC_AR * \param[in] pme The PME data structure. * \returns Pointer to force data */ -GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme)) - GPU_FUNC_TERM_WITH_RETURN(nullptr); +GPU_FUNC_QUALIFIER DeviceBuffer pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme)) + GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer{}); /*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion * \param[in] pme The PME data structure. - * \returns Pointer to sychronizer + * \returns Pointer to synchronizer */ GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr); diff --git a/src/gromacs/ewald/pme_force_sender_gpu.h b/src/gromacs/ewald/pme_force_sender_gpu.h index 081ba454e6..2612609779 100644 --- a/src/gromacs/ewald/pme_force_sender_gpu.h +++ b/src/gromacs/ewald/pme_force_sender_gpu.h @@ -45,6 +45,7 @@ #include #include "gromacs/math/vectypes.h" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/utility/gmxmpi.h" class GpuEventSynchronizer; @@ -83,7 +84,7 @@ public: * Initialization of GPU PME Force sender * \param[in] d_f force buffer in GPU memory */ - void sendForceBufferAddressToPpRanks(rvec* d_f); + void sendForceBufferAddressToPpRanks(DeviceBuffer d_f); /*! \brief * Send force synchronizer to PP rank diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp index 8d8b97f5c5..69974b3119 100644 --- a/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.cpp @@ -48,6 +48,7 @@ #include "config.h" #include "gromacs/ewald/pme_force_sender_gpu.h" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/utility/arrayref.h" #include "gromacs/utility/gmxassert.h" @@ -75,7 +76,7 @@ PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer* /*pmeForcesReady */, PmeForceSenderGpu::~PmeForceSenderGpu() = default; /*!\brief init PME-PP GPU communication stub */ -void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* /* d_f */) +void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer /* d_f */) { GMX_ASSERT(!impl_, "A CPU stub for PME-PP GPU communication initialization was called instead of the " diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.cu b/src/gromacs/ewald/pme_force_sender_gpu_impl.cu index 44a2e30de3..509a624d04 100644 --- a/src/gromacs/ewald/pme_force_sender_gpu_impl.cu +++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.cu @@ -70,7 +70,7 @@ PmeForceSenderGpu::Impl::Impl(GpuEventSynchronizer* pmeForcesReady, PmeForceSenderGpu::Impl::~Impl() = default; /*! \brief sends force buffer address to PP ranks */ -void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(rvec* d_f) +void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(DeviceBuffer d_f) { int ind_start = 0; int ind_end = 0; @@ -113,7 +113,7 @@ PmeForceSenderGpu::PmeForceSenderGpu(GpuEventSynchronizer* pmeForcesReady, PmeForceSenderGpu::~PmeForceSenderGpu() = default; -void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* d_f) +void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer d_f) { impl_->sendForceBufferAddressToPpRanks(d_f); } diff --git a/src/gromacs/ewald/pme_force_sender_gpu_impl.h b/src/gromacs/ewald/pme_force_sender_gpu_impl.h index ad9718c468..c7d4c0d76c 100644 --- a/src/gromacs/ewald/pme_force_sender_gpu_impl.h +++ b/src/gromacs/ewald/pme_force_sender_gpu_impl.h @@ -44,9 +44,12 @@ #define GMX_PMEFORCESENDERGPU_IMPL_H #include "gromacs/ewald/pme_force_sender_gpu.h" -#include "gromacs/gpu_utils/gpueventsynchronizer.cuh" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" +#include "gromacs/gpu_utils/gputraits.h" #include "gromacs/utility/arrayref.h" +class GpuEventSynchronizer; + namespace gmx { @@ -68,7 +71,7 @@ public: * sends force buffer address to PP rank * \param[in] d_f force buffer in GPU memory */ - void sendForceBufferAddressToPpRanks(rvec* d_f); + void sendForceBufferAddressToPpRanks(DeviceBuffer d_f); /*! \brief * Send force synchronizer to PP rank diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp index e0c32e207d..225fb1050a 100644 --- a/src/gromacs/ewald/pme_gpu.cpp +++ b/src/gromacs/ewald/pme_gpu.cpp @@ -440,11 +440,11 @@ void pme_gpu_reinit_computation(const gmx_pme_t* pme, gmx_wallcycle* wcycle) wallcycle_stop(wcycle, ewcLAUNCH_GPU); } -void* pme_gpu_get_device_f(const gmx_pme_t* pme) +DeviceBuffer pme_gpu_get_device_f(const gmx_pme_t* pme) { if (!pme || !pme_gpu_active(pme)) { - return nullptr; + return DeviceBuffer{}; } return pme_gpu_get_kernelparam_forces(pme->gpu); } diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index 8eea806ac1..798a9be9ef 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -1701,7 +1701,7 @@ void pme_gpu_gather(PmeGpu* pmeGpu, real** h_grids, const float lambda) } } -void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu) +DeviceBuffer pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu) { if (pmeGpu && pmeGpu->kernelParams) { @@ -1709,7 +1709,7 @@ void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu) } else { - return nullptr; + return DeviceBuffer{}; } } diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h index 41b912e2b6..7baa6bd347 100644 --- a/src/gromacs/ewald/pme_gpu_internal.h +++ b/src/gromacs/ewald/pme_gpu_internal.h @@ -405,8 +405,8 @@ GPU_FUNC_QUALIFIER void pme_gpu_set_kernelparam_coordinates(const PmeGpu* GPU_FU * \param[in] pmeGpu The PME GPU structure. * \returns Pointer to force data */ -GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) - GPU_FUNC_TERM_WITH_RETURN(nullptr); +GPU_FUNC_QUALIFIER DeviceBuffer pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) + GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer{}); /*! \brief Return pointer to the sync object triggered after the PME force calculation completion * \param[in] pmeGpu The PME GPU structure. diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp index 9ba22e2620..138711440c 100644 --- a/src/gromacs/ewald/pme_only.cpp +++ b/src/gromacs/ewald/pme_only.cpp @@ -445,8 +445,7 @@ static int gmx_pme_recv_coeffs_coords(struct gmx_pme_t* pme, // This rank will have its data accessed directly by PP rank, so needs to send the remote addresses. pme_pp->pmeCoordinateReceiverGpu->sendCoordinateBufferAddressToPpRanks( stateGpu->getCoordinates()); - pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks( - reinterpret_cast(pme_gpu_get_device_f(pme))); + pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(pme_gpu_get_device_f(pme)); } } diff --git a/src/gromacs/ewald/pme_pp_comm_gpu.h b/src/gromacs/ewald/pme_pp_comm_gpu.h index 3e56da9af3..886e0c221b 100644 --- a/src/gromacs/ewald/pme_pp_comm_gpu.h +++ b/src/gromacs/ewald/pme_pp_comm_gpu.h @@ -44,6 +44,7 @@ #include +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/utility/gmxmpi.h" class DeviceContext; @@ -99,7 +100,7 @@ public: /*! \brief * Return pointer to buffer used for staging PME force on GPU */ - void* getGpuForceStagingPtr(); + DeviceBuffer getGpuForceStagingPtr(); /*! \brief * Return pointer to event recorded when forces are ready diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp index c5f92aa53f..d31b976c10 100644 --- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cpp @@ -102,12 +102,12 @@ void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* /* sendPtr */, "implementation."); } -void* PmePpCommGpu::getGpuForceStagingPtr() +DeviceBuffer PmePpCommGpu::getGpuForceStagingPtr() { GMX_ASSERT(!impl_, "A CPU stub for PME-PP GPU communication was called instead of the correct " "implementation."); - return nullptr; + return DeviceBuffer{}; } GpuEventSynchronizer* PmePpCommGpu::getForcesReadySynchronizer() diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu index 0ecf028133..cb9e787c44 100644 --- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu +++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.cu @@ -64,7 +64,8 @@ PmePpCommGpu::Impl::Impl(MPI_Comm comm, deviceContext_(deviceContext), pmePpCommStream_(deviceStream), comm_(comm), - pmeRank_(pmeRank) + pmeRank_(pmeRank), + d_pmeForces_(nullptr) { GMX_RELEASE_ASSERT( GMX_THREAD_MPI, @@ -155,9 +156,10 @@ void PmePpCommGpu::Impl::sendCoordinatesToPmeCudaDirect(void* sendPtr, GMX_UNUSED_VALUE(coordinatesReadyOnDeviceEvent); #endif } -void* PmePpCommGpu::Impl::getGpuForceStagingPtr() + +DeviceBuffer PmePpCommGpu::Impl::getGpuForceStagingPtr() { - return static_cast(d_pmeForces_); + return d_pmeForces_; } GpuEventSynchronizer* PmePpCommGpu::Impl::getForcesReadySynchronizer() @@ -194,7 +196,7 @@ void PmePpCommGpu::sendCoordinatesToPmeCudaDirect(void* sendPtr, sendPtr, sendSize, sendPmeCoordinatesFromGpu, coordinatesReadyOnDeviceEvent); } -void* PmePpCommGpu::getGpuForceStagingPtr() +DeviceBuffer PmePpCommGpu::getGpuForceStagingPtr() { return impl_->getGpuForceStagingPtr(); } diff --git a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h index 042891a04d..70ef8f937c 100644 --- a/src/gromacs/ewald/pme_pp_comm_gpu_impl.h +++ b/src/gromacs/ewald/pme_pp_comm_gpu_impl.h @@ -44,7 +44,9 @@ #define GMX_PME_PP_COMM_GPU_IMPL_H #include "gromacs/ewald/pme_pp_comm_gpu.h" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/gpu_utils/gpueventsynchronizer.cuh" +#include "gromacs/gpu_utils/gputraits.h" #include "gromacs/math/vectypes.h" #include "gromacs/utility/gmxmpi.h" @@ -110,7 +112,7 @@ public: /*! \brief * Return pointer to buffer used for staging PME force on GPU */ - void* getGpuForceStagingPtr(); + DeviceBuffer getGpuForceStagingPtr(); /*! \brief * Return pointer to event recorded when forces are ready @@ -131,7 +133,7 @@ private: //! Rank of PME task int pmeRank_ = -1; //! Buffer for staging PME force on GPU - rvec* d_pmeForces_ = nullptr; + DeviceBuffer d_pmeForces_; //! number of atoms in PME force staging array int d_pmeForcesSize_ = -1; //! number of atoms allocated in recvbuf array diff --git a/src/gromacs/mdlib/gpuforcereduction.h b/src/gromacs/mdlib/gpuforcereduction.h index b23df660ed..2955dd60c2 100644 --- a/src/gromacs/mdlib/gpuforcereduction.h +++ b/src/gromacs/mdlib/gpuforcereduction.h @@ -92,7 +92,7 @@ public: * * \param [in] forcePtr Pointer to force to be reduced */ - void registerRvecForce(void* forcePtr); + void registerRvecForce(DeviceBuffer forcePtr); /*! \brief Add a dependency for this force reduction * diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cpp b/src/gromacs/mdlib/gpuforcereduction_impl.cpp index b431fbad49..6d826d6675 100644 --- a/src/gromacs/mdlib/gpuforcereduction_impl.cpp +++ b/src/gromacs/mdlib/gpuforcereduction_impl.cpp @@ -82,7 +82,7 @@ void GpuForceReduction::registerNbnxmForce(DeviceBuffer /* forcePtr */) } // NOLINTNEXTLINE readability-convert-member-functions-to-static -void GpuForceReduction::registerRvecForce(void* /* forcePtr */) +void GpuForceReduction::registerRvecForce(DeviceBuffer /* forcePtr */) { GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation."); } diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cu b/src/gromacs/mdlib/gpuforcereduction_impl.cu index dab7d4da0c..f62ec44d46 100644 --- a/src/gromacs/mdlib/gpuforcereduction_impl.cu +++ b/src/gromacs/mdlib/gpuforcereduction_impl.cu @@ -43,7 +43,7 @@ #include "gmxpre.h" -#include "gpuforcereduction_impl.cuh" +#include "gpuforcereduction_impl.h" #include @@ -108,11 +108,14 @@ static __global__ void reduceKernel(const float3* __restrict__ gm_nbnxmForce, GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream, gmx_wallcycle* wcycle) : + baseForce_(nullptr), deviceContext_(deviceContext), deviceStream_(deviceStream), + nbnxmForceToAdd_(nullptr), + rvecForceToAdd_(nullptr), wcycle_(wcycle){}; -void GpuForceReduction::Impl::reinit(float3* baseForcePtr, +void GpuForceReduction::Impl::reinit(DeviceBuffer baseForcePtr, const int numAtoms, ArrayRef cell, const int atomStart, @@ -223,9 +226,9 @@ void GpuForceReduction::registerNbnxmForce(DeviceBuffer forcePtr) impl_->registerNbnxmForce(forcePtr); } -void GpuForceReduction::registerRvecForce(void* forcePtr) +void GpuForceReduction::registerRvecForce(DeviceBuffer forcePtr) { - impl_->registerRvecForce(reinterpret_cast>(forcePtr)); + impl_->registerRvecForce(forcePtr); } void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency) @@ -240,7 +243,7 @@ void GpuForceReduction::reinit(DeviceBuffer baseForcePtr, const bool accumulate, GpuEventSynchronizer* completionMarker) { - impl_->reinit(asFloat3(baseForcePtr), numAtoms, cell, atomStart, accumulate, completionMarker); + impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker); } void GpuForceReduction::execute() { diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cuh b/src/gromacs/mdlib/gpuforcereduction_impl.h similarity index 95% rename from src/gromacs/mdlib/gpuforcereduction_impl.cuh rename to src/gromacs/mdlib/gpuforcereduction_impl.h index c7d9493c82..98c8ca2c7b 100644 --- a/src/gromacs/mdlib/gpuforcereduction_impl.cuh +++ b/src/gromacs/mdlib/gpuforcereduction_impl.h @@ -59,7 +59,7 @@ struct cellInfo //! cell index mapping for any nbat-format forces const int* cell = nullptr; //! device copy of cell index mapping for any nbat-format forces - int* d_cell = nullptr; + DeviceBuffer d_cell; //! number of atoms in cell array int cellSize = -1; //! number of atoms allocated in cell array @@ -76,7 +76,7 @@ public: * \param [in] deviceContext GPU device context * \param [in] wcycle The wallclock counter */ - Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStreami, gmx_wallcycle* wcycle); + Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream, gmx_wallcycle* wcycle); ~Impl(); /*! \brief Register a nbnxm-format force to be reduced @@ -106,7 +106,7 @@ public: * \param [in] accumulate Whether reduction should be accumulated * \param [in] completionMarker Event to be marked when launch of reduction is complete */ - void reinit(float3* baseForcePtr, + void reinit(DeviceBuffer baseForcePtr, const int numAtoms, ArrayRef cell, const int atomStart, @@ -118,7 +118,7 @@ public: private: //! force to be used as a base for this reduction - float3* baseForce_ = nullptr; + DeviceBuffer baseForce_; //! starting atom int atomStart_ = 0; //! number of atoms @@ -134,9 +134,9 @@ private: //! stream to be used for this reduction const DeviceStream& deviceStream_; //! Nbnxm force to be added in this reduction - DeviceBuffer nbnxmForceToAdd_ = nullptr; + DeviceBuffer nbnxmForceToAdd_; //! Rvec-format force to be added in this reduction - DeviceBuffer rvecForceToAdd_ = nullptr; + DeviceBuffer rvecForceToAdd_; //! event to be marked when redcution launch has been completed GpuEventSynchronizer* completionMarker_ = nullptr; //! The wallclock counter diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 99dc4758c4..ec10bb27a0 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1123,9 +1123,10 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, if (runScheduleWork->simulationWork.useGpuPme && (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication)) { - void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata) - : // PME force buffer on same GPU - fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU + DeviceBuffer forcePtr = + thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata) + : // PME force buffer on same GPU + fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr); GpuEventSynchronizer* const pmeSynchronizer = diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu index b561856511..3b428a183b 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu @@ -60,6 +60,7 @@ #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/device_stream.h" #include "gromacs/gpu_utils/devicebuffer.h" +#include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #include "gromacs/gpu_utils/gputraits.cuh" #include "gromacs/gpu_utils/vectype_ops.cuh" #include "gromacs/mdlib/leapfrog_gpu.h" diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h index 76e41398e7..1d1a8fd0de 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.h +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h @@ -48,13 +48,14 @@ #include "gmxpre.h" -#include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #include "gromacs/mdlib/leapfrog_gpu.h" #include "gromacs/mdlib/lincs_gpu.cuh" #include "gromacs/mdlib/settle_gpu.cuh" #include "gromacs/mdlib/update_constrain_gpu.h" #include "gromacs/mdtypes/inputrec.h" +class GpuEventSynchronizer; + namespace gmx { -- 2.22.0