... instead of raw device pointers.
Preparation for #3932. PME change is incidental, the main focus is
GpuForceReduction.
* \param[in] pme The PME data structure.
* \returns Pointer to force data
*/
-GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
- GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+ GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
/*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
* \param[in] pme The PME data structure.
- * \returns Pointer to sychronizer
+ * \returns Pointer to synchronizer
*/
GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
GPU_FUNC_TERM_WITH_RETURN(nullptr);
#include <memory>
#include "gromacs/math/vectypes.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/gmxmpi.h"
class GpuEventSynchronizer;
* Initialization of GPU PME Force sender
* \param[in] d_f force buffer in GPU memory
*/
- void sendForceBufferAddressToPpRanks(rvec* d_f);
+ void sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f);
/*! \brief
* Send force synchronizer to PP rank
#include "config.h"
#include "gromacs/ewald/pme_force_sender_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/gmxassert.h"
PmeForceSenderGpu::~PmeForceSenderGpu() = default;
/*!\brief init PME-PP GPU communication stub */
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* /* d_f */)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> /* d_f */)
{
GMX_ASSERT(!impl_,
"A CPU stub for PME-PP GPU communication initialization was called instead of the "
PmeForceSenderGpu::Impl::~Impl() = default;
/*! \brief sends force buffer address to PP ranks */
-void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(rvec* d_f)
+void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(DeviceBuffer<Float3> d_f)
{
int ind_start = 0;
int ind_end = 0;
PmeForceSenderGpu::~PmeForceSenderGpu() = default;
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* d_f)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f)
{
impl_->sendForceBufferAddressToPpRanks(d_f);
}
#define GMX_PMEFORCESENDERGPU_IMPL_H
#include "gromacs/ewald/pme_force_sender_gpu.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/utility/arrayref.h"
+class GpuEventSynchronizer;
+
namespace gmx
{
* sends force buffer address to PP rank
* \param[in] d_f force buffer in GPU memory
*/
- void sendForceBufferAddressToPpRanks(rvec* d_f);
+ void sendForceBufferAddressToPpRanks(DeviceBuffer<Float3> d_f);
/*! \brief
* Send force synchronizer to PP rank
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
-void* pme_gpu_get_device_f(const gmx_pme_t* pme)
+DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* pme)
{
if (!pme || !pme_gpu_active(pme))
{
- return nullptr;
+ return DeviceBuffer<gmx::RVec>{};
}
return pme_gpu_get_kernelparam_forces(pme->gpu);
}
}
}
-void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
+DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
{
if (pmeGpu && pmeGpu->kernelParams)
{
}
else
{
- return nullptr;
+ return DeviceBuffer<gmx::RVec>{};
}
}
* \param[in] pmeGpu The PME GPU structure.
* \returns Pointer to force data
*/
-GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
- GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
+ GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
/*! \brief Return pointer to the sync object triggered after the PME force calculation completion
* \param[in] pmeGpu The PME GPU structure.
// This rank will have its data accessed directly by PP rank, so needs to send the remote addresses.
pme_pp->pmeCoordinateReceiverGpu->sendCoordinateBufferAddressToPpRanks(
stateGpu->getCoordinates());
- pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(
- reinterpret_cast<rvec*>(pme_gpu_get_device_f(pme)));
+ pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(pme_gpu_get_device_f(pme));
}
}
#include <memory>
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/gmxmpi.h"
class DeviceContext;
/*! \brief
* Return pointer to buffer used for staging PME force on GPU
*/
- void* getGpuForceStagingPtr();
+ DeviceBuffer<gmx::RVec> getGpuForceStagingPtr();
/*! \brief
* Return pointer to event recorded when forces are ready
"implementation.");
}
-void* PmePpCommGpu::getGpuForceStagingPtr()
+DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
{
GMX_ASSERT(!impl_,
"A CPU stub for PME-PP GPU communication was called instead of the correct "
"implementation.");
- return nullptr;
+ return DeviceBuffer<gmx::RVec>{};
}
GpuEventSynchronizer* PmePpCommGpu::getForcesReadySynchronizer()
deviceContext_(deviceContext),
pmePpCommStream_(deviceStream),
comm_(comm),
- pmeRank_(pmeRank)
+ pmeRank_(pmeRank),
+ d_pmeForces_(nullptr)
{
GMX_RELEASE_ASSERT(
GMX_THREAD_MPI,
GMX_UNUSED_VALUE(coordinatesReadyOnDeviceEvent);
#endif
}
-void* PmePpCommGpu::Impl::getGpuForceStagingPtr()
+
+DeviceBuffer<Float3> PmePpCommGpu::Impl::getGpuForceStagingPtr()
{
- return static_cast<void*>(d_pmeForces_);
+ return d_pmeForces_;
}
GpuEventSynchronizer* PmePpCommGpu::Impl::getForcesReadySynchronizer()
sendPtr, sendSize, sendPmeCoordinatesFromGpu, coordinatesReadyOnDeviceEvent);
}
-void* PmePpCommGpu::getGpuForceStagingPtr()
+DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
{
return impl_->getGpuForceStagingPtr();
}
#define GMX_PME_PP_COMM_GPU_IMPL_H
#include "gromacs/ewald/pme_pp_comm_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/gmxmpi.h"
/*! \brief
* Return pointer to buffer used for staging PME force on GPU
*/
- void* getGpuForceStagingPtr();
+ DeviceBuffer<Float3> getGpuForceStagingPtr();
/*! \brief
* Return pointer to event recorded when forces are ready
//! Rank of PME task
int pmeRank_ = -1;
//! Buffer for staging PME force on GPU
- rvec* d_pmeForces_ = nullptr;
+ DeviceBuffer<gmx::RVec> d_pmeForces_;
//! number of atoms in PME force staging array
int d_pmeForcesSize_ = -1;
//! number of atoms allocated in recvbuf array
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerRvecForce(void* forcePtr);
+ void registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr);
/*! \brief Add a dependency for this force reduction
*
}
// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerRvecForce(void* /* forcePtr */)
+void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> /* forcePtr */)
{
GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
}
#include "gmxpre.h"
-#include "gpuforcereduction_impl.cuh"
+#include "gpuforcereduction_impl.h"
#include <stdio.h>
GpuForceReduction::Impl::Impl(const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle) :
+ baseForce_(nullptr),
deviceContext_(deviceContext),
deviceStream_(deviceStream),
+ nbnxmForceToAdd_(nullptr),
+ rvecForceToAdd_(nullptr),
wcycle_(wcycle){};
-void GpuForceReduction::Impl::reinit(float3* baseForcePtr,
+void GpuForceReduction::Impl::reinit(DeviceBuffer<Float3> baseForcePtr,
const int numAtoms,
ArrayRef<const int> cell,
const int atomStart,
impl_->registerNbnxmForce(forcePtr);
}
-void GpuForceReduction::registerRvecForce(void* forcePtr)
+void GpuForceReduction::registerRvecForce(DeviceBuffer<gmx::RVec> forcePtr)
{
- impl_->registerRvecForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
+ impl_->registerRvecForce(forcePtr);
}
void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
const bool accumulate,
GpuEventSynchronizer* completionMarker)
{
- impl_->reinit(asFloat3(baseForcePtr), numAtoms, cell, atomStart, accumulate, completionMarker);
+ impl_->reinit(baseForcePtr, numAtoms, cell, atomStart, accumulate, completionMarker);
}
void GpuForceReduction::execute()
{
//! cell index mapping for any nbat-format forces
const int* cell = nullptr;
//! device copy of cell index mapping for any nbat-format forces
- int* d_cell = nullptr;
+ DeviceBuffer<int> d_cell;
//! number of atoms in cell array
int cellSize = -1;
//! number of atoms allocated in cell array
* \param [in] deviceContext GPU device context
* \param [in] wcycle The wallclock counter
*/
- Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStreami, gmx_wallcycle* wcycle);
+ Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream, gmx_wallcycle* wcycle);
~Impl();
/*! \brief Register a nbnxm-format force to be reduced
* \param [in] accumulate Whether reduction should be accumulated
* \param [in] completionMarker Event to be marked when launch of reduction is complete
*/
- void reinit(float3* baseForcePtr,
+ void reinit(DeviceBuffer<Float3> baseForcePtr,
const int numAtoms,
ArrayRef<const int> cell,
const int atomStart,
private:
//! force to be used as a base for this reduction
- float3* baseForce_ = nullptr;
+ DeviceBuffer<Float3> baseForce_;
//! starting atom
int atomStart_ = 0;
//! number of atoms
//! stream to be used for this reduction
const DeviceStream& deviceStream_;
//! Nbnxm force to be added in this reduction
- DeviceBuffer<RVec> nbnxmForceToAdd_ = nullptr;
+ DeviceBuffer<RVec> nbnxmForceToAdd_;
//! Rvec-format force to be added in this reduction
- DeviceBuffer<RVec> rvecForceToAdd_ = nullptr;
+ DeviceBuffer<RVec> rvecForceToAdd_;
//! event to be marked when redcution launch has been completed
GpuEventSynchronizer* completionMarker_ = nullptr;
//! The wallclock counter
if (runScheduleWork->simulationWork.useGpuPme
&& (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication))
{
- void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
- : // PME force buffer on same GPU
- fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
+ DeviceBuffer<gmx::RVec> forcePtr =
+ thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
+ : // PME force buffer on same GPU
+ fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr);
GpuEventSynchronizer* const pmeSynchronizer =
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/mdlib/leapfrog_gpu.h"
#include "gmxpre.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/mdlib/leapfrog_gpu.h"
#include "gromacs/mdlib/lincs_gpu.cuh"
#include "gromacs/mdlib/settle_gpu.cuh"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/inputrec.h"
+class GpuEventSynchronizer;
+
namespace gmx
{