* \param[in] pme The PME data structure.
* \returns Pointer to force data
*/
-GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
- GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
+GPU_FUNC_QUALIFIER void* pme_gpu_get_device_f(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
+ GPU_FUNC_TERM_WITH_RETURN(nullptr);
/*! \brief Get pointer to the device synchronizer object that allows syncing on PME force calculation completion
* \param[in] pme The PME data structure.
- * \returns Pointer to synchronizer
+ * \returns Pointer to sychronizer
*/
GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_f_ready_synchronizer(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme))
GPU_FUNC_TERM_WITH_RETURN(nullptr);
#include <memory>
#include "gromacs/math/vectypes.h"
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/gmxmpi.h"
class DeviceStream;
* Initialization of GPU PME Force sender
* \param[in] d_f force buffer in GPU memory
*/
- void sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f);
+ void sendForceBufferAddressToPpRanks(rvec* d_f);
/*! \brief
* Send PP data to PP rank
PmeForceSenderGpu::~PmeForceSenderGpu() = default;
/*!\brief init PME-PP GPU communication stub */
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<gmx::RVec> /* d_f */)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* /* d_f */)
{
GMX_ASSERT(!impl_,
"A CPU stub for PME-PP GPU communication initialization was called instead of the "
#include "config.h"
#include "gromacs/gpu_utils/cudautils.cuh"
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/utility/gmxmpi.h"
PmeForceSenderGpu::Impl::~Impl() = default;
/*! \brief sends force buffer address to PP ranks */
-void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f)
+void PmeForceSenderGpu::Impl::sendForceBufferAddressToPpRanks(rvec* d_f)
{
int ind_start = 0;
int ind_end = 0;
PmeForceSenderGpu::~PmeForceSenderGpu() = default;
-void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f)
+void PmeForceSenderGpu::sendForceBufferAddressToPpRanks(rvec* d_f)
{
impl_->sendForceBufferAddressToPpRanks(d_f);
}
#define GMX_PMEFORCESENDERGPU_IMPL_H
#include "gromacs/ewald/pme_force_sender_gpu.h"
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/utility/arrayref.h"
* sends force buffer address to PP rank
* \param[in] d_f force buffer in GPU memory
*/
- void sendForceBufferAddressToPpRanks(DeviceBuffer<RVec> d_f);
+ void sendForceBufferAddressToPpRanks(rvec* d_f);
/*! \brief
* Send PP data to PP rank
const float* __restrict__ gm_coefficientsB = kernelParams.atoms.d_coefficients[1];
const float* __restrict__ gm_gridA = kernelParams.grid.d_realGrid[0];
const float* __restrict__ gm_gridB = kernelParams.grid.d_realGrid[1];
- float* __restrict__ gm_forces = reinterpret_cast<float*>(kernelParams.atoms.d_forces);
+ float* __restrict__ gm_forces = kernelParams.atoms.d_forces;
/* Global memory pointers for readGlobal */
const float* __restrict__ gm_theta = kernelParams.atoms.d_theta;
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
-DeviceBuffer<gmx::RVec> pme_gpu_get_device_f(const gmx_pme_t* pme)
+void* pme_gpu_get_device_f(const gmx_pme_t* pme)
{
if (!pme || !pme_gpu_active(pme))
{
- return DeviceBuffer<gmx::RVec>{};
+ return nullptr;
}
return pme_gpu_get_kernelparam_forces(pme->gpu);
}
void pme_gpu_realloc_forces(PmeGpu* pmeGpu)
{
- const size_t newForcesSize = pmeGpu->nAtomsAlloc;
+ const size_t newForcesSize = pmeGpu->nAtomsAlloc * DIM;
GMX_ASSERT(newForcesSize > 0, "Bad number of atoms in PME GPU");
reallocateDeviceBuffer(&pmeGpu->kernelParams->atoms.d_forces,
newForcesSize,
void pme_gpu_copy_input_forces(PmeGpu* pmeGpu)
{
GMX_ASSERT(pmeGpu->kernelParams->atoms.nAtoms > 0, "Bad number of atoms in PME GPU");
+ float* h_forcesFloat = reinterpret_cast<float*>(pmeGpu->staging.h_forces.data());
copyToDeviceBuffer(&pmeGpu->kernelParams->atoms.d_forces,
- pmeGpu->staging.h_forces.data(),
+ h_forcesFloat,
0,
- pmeGpu->kernelParams->atoms.nAtoms,
+ DIM * pmeGpu->kernelParams->atoms.nAtoms,
pmeGpu->archSpecific->pmeStream_,
pmeGpu->settings.transferKind,
nullptr);
void pme_gpu_copy_output_forces(PmeGpu* pmeGpu)
{
GMX_ASSERT(pmeGpu->kernelParams->atoms.nAtoms > 0, "Bad number of atoms in PME GPU");
- copyFromDeviceBuffer(pmeGpu->staging.h_forces.data(),
+ float* h_forcesFloat = reinterpret_cast<float*>(pmeGpu->staging.h_forces.data());
+ copyFromDeviceBuffer(h_forcesFloat,
&pmeGpu->kernelParams->atoms.d_forces,
0,
- pmeGpu->kernelParams->atoms.nAtoms,
+ DIM * pmeGpu->kernelParams->atoms.nAtoms,
pmeGpu->archSpecific->pmeStream_,
pmeGpu->settings.transferKind,
nullptr);
}
}
-DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
+void* pme_gpu_get_kernelparam_forces(const PmeGpu* pmeGpu)
{
if (pmeGpu && pmeGpu->kernelParams)
{
}
else
{
- return DeviceBuffer<gmx::RVec>{};
+ return nullptr;
}
}
* \param[in] pmeGpu The PME GPU structure.
* \returns Pointer to force data
*/
-GPU_FUNC_QUALIFIER DeviceBuffer<gmx::RVec> pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
- GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
+GPU_FUNC_QUALIFIER void* pme_gpu_get_kernelparam_forces(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu))
+ GPU_FUNC_TERM_WITH_RETURN(nullptr);
/*! \brief Return pointer to the sync object triggered after the PME force calculation completion
* \param[in] pmeGpu The PME GPU structure.
* The forces change and need to be copied from (and possibly to) the GPU for every PME
* computation, but reallocation happens only at DD.
*/
- HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<gmx::RVec>) d_forces;
+ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_forces;
/*! \brief Global GPU memory array handle with ivec atom gridline indices.
* Computed on GPU in the spline calculation part.
*/
// This rank will have its data accessed directly by PP rank, so needs to send the remote addresses.
pme_pp->pmeCoordinateReceiverGpu->sendCoordinateBufferAddressToPpRanks(
stateGpu->getCoordinates());
- pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(pme_gpu_get_device_f(pme));
+ pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(
+ reinterpret_cast<rvec*>(pme_gpu_get_device_f(pme)));
}
}
if (pme_pp->useGpuDirectComm)
{
// Data will be transferred directly from GPU.
- DeviceBuffer<gmx::RVec> gmx_unused d_f = pme_gpu_get_device_f(&pme);
-# if GMX_GPU_CUDA
- // OpenCL does not allow host-side pointer arithmetic on buffers. Neither does SYCL.
- sendbuf = reinterpret_cast<void*>(&d_f[ind_start]);
-# else
- GMX_RELEASE_ASSERT(false, "Can only use GPU Direct Communications with CUDA");
-# endif
+ rvec* d_f = reinterpret_cast<rvec*>(pme_gpu_get_device_f(&pme));
+ sendbuf = reinterpret_cast<void*>(&d_f[ind_start]);
}
sendFToPP(sendbuf, receiver, pme_pp, &messages);
}
#include <memory>
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/gmxmpi.h"
class DeviceContext;
/*! \brief
* Return pointer to buffer used for staging PME force on GPU
*/
- DeviceBuffer<gmx::RVec> getGpuForceStagingPtr();
+ void* getGpuForceStagingPtr();
/*! \brief
* Return pointer to event recorded when forces are ready
"implementation.");
}
-DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
+void* PmePpCommGpu::getGpuForceStagingPtr()
{
GMX_ASSERT(!impl_,
"A CPU stub for PME-PP GPU communication was called instead of the correct "
"implementation.");
- return DeviceBuffer<gmx::RVec>{};
+ return nullptr;
}
GpuEventSynchronizer* PmePpCommGpu::getForcesReadySynchronizer()
GMX_UNUSED_VALUE(coordinatesReadyOnDeviceEvent);
#endif
}
-DeviceBuffer<gmx::RVec> PmePpCommGpu::Impl::getGpuForceStagingPtr()
+void* PmePpCommGpu::Impl::getGpuForceStagingPtr()
{
- return d_pmeForces_;
+ return static_cast<void*>(d_pmeForces_);
}
GpuEventSynchronizer* PmePpCommGpu::Impl::getForcesReadySynchronizer()
sendPtr, sendSize, sendPmeCoordinatesFromGpu, coordinatesReadyOnDeviceEvent);
}
-DeviceBuffer<gmx::RVec> PmePpCommGpu::getGpuForceStagingPtr()
+void* PmePpCommGpu::getGpuForceStagingPtr()
{
return impl_->getGpuForceStagingPtr();
}
#define GMX_PME_PP_COMM_GPU_IMPL_H
#include "gromacs/ewald/pme_pp_comm_gpu.h"
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/gmxmpi.h"
/*! \brief
* Return pointer to buffer used for staging PME force on GPU
*/
- DeviceBuffer<gmx::RVec> getGpuForceStagingPtr();
+ void* getGpuForceStagingPtr();
/*! \brief
* Return pointer to event recorded when forces are ready
//! Handle for CUDA stream used for the communication operations in this class
const DeviceStream& pmePpCommStream_;
//! Remote location of PME coordinate data buffer
- DeviceBuffer<gmx::RVec> remotePmeXBuffer_ = nullptr;
+ void* remotePmeXBuffer_ = nullptr;
//! Remote location of PME force data buffer
- DeviceBuffer<gmx::RVec> remotePmeFBuffer_ = nullptr;
+ void* remotePmeFBuffer_ = nullptr;
//! communicator for simulation
MPI_Comm comm_;
//! Rank of PME task
int pmeRank_ = -1;
//! Buffer for staging PME force on GPU
- DeviceBuffer<gmx::RVec> d_pmeForces_ = nullptr;
+ rvec* d_pmeForces_ = nullptr;
//! number of atoms in PME force staging array
int d_pmeForcesSize_ = -1;
//! number of atoms allocated in recvbuf array
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerNbnxmForce(DeviceBuffer<RVec> forcePtr);
+ void registerNbnxmForce(void* forcePtr);
/*! \brief Register a rvec-format force to be reduced
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerRvecForce(DeviceBuffer<RVec> forcePtr);
+ void registerRvecForce(void* forcePtr);
/*! \brief Add a dependency for this force reduction
*
/*! \brief Reinitialize the GPU force reduction
*
- * \param [in] baseForce Pointer to force to be used as a base
+ * \param [in] baseForcePtr Pointer to force to be used as a base
* \param [in] numAtoms The number of atoms
* \param [in] cell Pointer to the cell array
* \param [in] atomStart The start atom for the reduction
* \param [in] accumulate Whether reduction should be accumulated
* \param [in] completionMarker Event to be marked when launch of reduction is complete
*/
- void reinit(DeviceBuffer<RVec> baseForce,
+ void reinit(DeviceBuffer<RVec> baseForcePtr,
int numAtoms,
ArrayRef<const int> cell,
int atomStart,
}
// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
+void GpuForceReduction::registerNbnxmForce(void* /* forcePtr */)
{
GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
}
// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerRvecForce(DeviceBuffer<RVec> /* forcePtr */)
+void GpuForceReduction::registerRvecForce(void* /* forcePtr */)
{
GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
}
#include "gmxpre.h"
-#include "gpuforcereduction_impl.h"
+#include "gpuforcereduction_impl.cuh"
#include <stdio.h>
deviceStream_(deviceStream),
wcycle_(wcycle){};
-void GpuForceReduction::Impl::reinit(DeviceBuffer<gmx::RVec> baseForce,
- const int numAtoms,
- ArrayRef<const int> cell,
- const int atomStart,
- const bool accumulate,
- GpuEventSynchronizer* completionMarker)
+void GpuForceReduction::Impl::reinit(float3* baseForcePtr,
+ const int numAtoms,
+ ArrayRef<const int> cell,
+ const int atomStart,
+ const bool accumulate,
+ GpuEventSynchronizer* completionMarker)
{
- GMX_ASSERT((baseForce != nullptr), "Input base force for reduction has no data");
- baseForce_ = baseForce;
+ GMX_ASSERT((baseForcePtr != nullptr), "Input base force for reduction has no data");
+ baseForce_ = &(baseForcePtr[atomStart]);
numAtoms_ = numAtoms;
atomStart_ = atomStart;
accumulate_ = accumulate;
void GpuForceReduction::Impl::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
{
- GMX_ASSERT((forcePtr), "Input force for reduction has no data");
+ GMX_ASSERT((forcePtr != nullptr), "Input force for reduction has no data");
nbnxmForceToAdd_ = forcePtr;
};
void GpuForceReduction::Impl::registerRvecForce(DeviceBuffer<RVec> forcePtr)
{
- GMX_ASSERT((forcePtr), "Input force for reduction has no data");
+ GMX_ASSERT((forcePtr != nullptr), "Input force for reduction has no data");
rvecForceToAdd_ = forcePtr;
};
GMX_ASSERT((nbnxmForceToAdd_ != nullptr), "Nbnxm force for reduction has no data");
// Enqueue wait on all dependencies passed
- for (const auto& synchronizer : dependencyList_)
+ for (auto const synchronizer : dependencyList_)
{
synchronizer->enqueueWaitEvent(deviceStream_);
}
- float3* d_baseForce = &(asFloat3(baseForce_)[atomStart_]);
float3* d_nbnxmForce = asFloat3(nbnxmForceToAdd_);
float3* d_rvecForceToAdd = &(asFloat3(rvecForceToAdd_)[atomStart_]);
: (accumulate_ ? reduceKernel<false, true> : reduceKernel<false, false>);
const auto kernelArgs = prepareGpuKernelArguments(
- kernelFn, config, &d_nbnxmForce, &d_rvecForceToAdd, &d_baseForce, &cellInfo_.d_cell, &numAtoms_);
+ kernelFn, config, &d_nbnxmForce, &d_rvecForceToAdd, &baseForce_, &cellInfo_.d_cell, &numAtoms_);
launchGpuKernel(kernelFn, config, deviceStream_, nullptr, "Force Reduction", kernelArgs);
{
}
-void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> forcePtr)
+void GpuForceReduction::registerNbnxmForce(void* forcePtr)
{
- impl_->registerNbnxmForce(std::move(forcePtr));
+ impl_->registerNbnxmForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
}
-void GpuForceReduction::registerRvecForce(DeviceBuffer<RVec> forcePtr)
+void GpuForceReduction::registerRvecForce(void* forcePtr)
{
- impl_->registerRvecForce(std::move(forcePtr));
+ impl_->registerRvecForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
}
void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
impl_->addDependency(dependency);
}
-void GpuForceReduction::reinit(DeviceBuffer<RVec> baseForce,
+void GpuForceReduction::reinit(DeviceBuffer<RVec> baseForcePtr,
const int numAtoms,
ArrayRef<const int> cell,
const int atomStart,
const bool accumulate,
GpuEventSynchronizer* completionMarker)
{
- impl_->reinit(baseForce, numAtoms, cell, atomStart, accumulate, completionMarker);
+ impl_->reinit(asFloat3(baseForcePtr), numAtoms, cell, atomStart, accumulate, completionMarker);
}
void GpuForceReduction::execute()
{
//! cell index mapping for any nbat-format forces
const int* cell = nullptr;
//! device copy of cell index mapping for any nbat-format forces
- DeviceBuffer<int> d_cell;
+ int* d_cell = nullptr;
//! number of atoms in cell array
int cellSize = -1;
//! number of atoms allocated in cell array
* \param [in] deviceContext GPU device context
* \param [in] wcycle The wallclock counter
*/
- Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStream, gmx_wallcycle* wcycle);
+ Impl(const DeviceContext& deviceContext, const DeviceStream& deviceStreami, gmx_wallcycle* wcycle);
~Impl();
/*! \brief Register a nbnxm-format force to be reduced
/*! \brief Reinitialize the GPU force reduction
*
- * \param [in] baseForce Pointer to force to be used as a base
+ * \param [in] baseForcePtr Pointer to force to be used as a base
* \param [in] numAtoms The number of atoms
* \param [in] cell Pointer to the cell array
* \param [in] atomStart The start atom for the reduction
* \param [in] accumulate Whether reduction should be accumulated
* \param [in] completionMarker Event to be marked when launch of reduction is complete
*/
- void reinit(DeviceBuffer<RVec> baseForce,
+ void reinit(float3* baseForcePtr,
const int numAtoms,
ArrayRef<const int> cell,
const int atomStart,
private:
//! force to be used as a base for this reduction
- DeviceBuffer<RVec> baseForce_;
+ float3* baseForce_ = nullptr;
//! starting atom
int atomStart_ = 0;
//! number of atoms
int numAtoms_ = 0;
//! whether reduction is accumulated into base force buffer
- bool accumulate_ = true;
+ int accumulate_ = true;
//! cell information for any nbat-format forces
struct cellInfo cellInfo_;
//! GPU context object
//! stream to be used for this reduction
const DeviceStream& deviceStream_;
//! Nbnxm force to be added in this reduction
- DeviceBuffer<RVec> nbnxmForceToAdd_;
+ DeviceBuffer<RVec> nbnxmForceToAdd_ = nullptr;
//! Rvec-format force to be added in this reduction
- DeviceBuffer<RVec> rvecForceToAdd_;
+ DeviceBuffer<RVec> rvecForceToAdd_ = nullptr;
//! event to be marked when redcution launch has been completed
GpuEventSynchronizer* completionMarker_ = nullptr;
//! The wallclock counter
if (runScheduleWork->simulationWork.useGpuPme
&& (thisRankHasDuty(cr, DUTY_PME) || runScheduleWork->simulationWork.useGpuPmePpCommunication))
{
- DeviceBuffer<gmx::RVec> forcePtr =
- thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
- : // PME force buffer on same GPU
- fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
+ void* forcePtr = thisRankHasDuty(cr, DUTY_PME) ? pme_gpu_get_device_f(fr->pmedata)
+ : // PME force buffer on same GPU
+ fr->pmePpCommGpu->getGpuForceStagingPtr(); // buffer received from other GPU
fr->gpuForceReduction[gmx::AtomLocality::Local]->registerRvecForce(forcePtr);
GpuEventSynchronizer* const pmeSynchronizer =
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/devicebuffer.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/mdlib/leapfrog_gpu.h"
#ifndef GMX_MDLIB_UPDATE_CONSTRAIN_GPU_IMPL_H
#define GMX_MDLIB_UPDATE_CONSTRAIN_GPU_IMPL_H
-#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gmxpre.h"
+
+#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/mdlib/leapfrog_gpu.h"
+#include "gromacs/mdlib/lincs_gpu.cuh"
+#include "gromacs/mdlib/settle_gpu.cuh"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/pbcutil/pbc_aiuc.h"
-
-#if GMX_GPU_CUDA
-# include "gromacs/gpu_utils/gputraits.cuh"
-#endif
-
-class GpuEventSynchronizer;
-namespace gmx
-{
-class LincsGpu;
-class SettleGpu;
-class LeapFrogGpu;
-} // namespace gmx
namespace gmx
{
int numAtoms_;
//! Local copy of the pointer to the device positions buffer
- DeviceBuffer<float3> d_x_;
+ float3* d_x_;
//! Local copy of the pointer to the device velocities buffer
- DeviceBuffer<float3> d_v_;
+ float3* d_v_;
//! Local copy of the pointer to the device forces buffer
- DeviceBuffer<float3> d_f_;
+ float3* d_f_;
//! Device buffer for intermediate positions (maintained internally)
- DeviceBuffer<float3> d_xp_;
+ float3* d_xp_;
//! Number of elements in shifted coordinates buffer
int numXp_ = -1;
//! Allocation size for the shifted coordinates buffer
//! 1/mass for all atoms (GPU)
- DeviceBuffer<real> d_inverseMasses_;
+ real* d_inverseMasses_;
//! Number of elements in reciprocal masses buffer
int numInverseMasses_ = -1;
//! Allocation size for the reciprocal masses buffer
nbnxnInsertNonlocalGpuDependency(nb, interactionLoc);
}
-DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu* nb)
+void* getGpuForces(NbnxmGpu* nb)
{
return nb->atdat->f;
}
return numAtoms;
}
-DeviceBuffer<gmx::RVec> nonbonded_verlet_t::getGpuForces() const
+void* nonbonded_verlet_t::getGpuForces() const
{
return Nbnxm::getGpuForces(gpu_nbv);
}
*
* \returns A pointer to the force buffer in GPU memory
*/
- DeviceBuffer<gmx::RVec> getGpuForces() const;
+ void* getGpuForces() const;
//! Return the kernel setup
const Nbnxm::KernelSetup& kernelSetup() const { return kernelSetup_; }
* \returns A pointer to the force buffer in GPU memory
*/
CUDA_FUNC_QUALIFIER
-DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu gmx_unused* nb)
- CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
+void* getGpuForces(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
} // namespace Nbnxm
#endif