// flag to specify if forces should be accumulated in force buffer
// ops. For non-local part, this just depends on whether CPU forces are present.
bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) && haveCpuForces;
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
- forceWithShiftForces.force(), pme_gpu_get_device_f(fr->pmedata),
- pme_gpu_get_f_ready_synchronizer(fr->pmedata),
- useGpuFBufOps, useGpuFPmeReduction, accumulateForce);
+
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
+ nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::NonLocal,
+ nbv->getDeviceForces(),
+ pme_gpu_get_device_f(fr->pmedata),
+ pme_gpu_get_f_ready_synchronizer(fr->pmedata),
+ useGpuFPmeReduction, accumulateForce);
nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::NonLocal);
}
+ else
+ {
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
+ forceWithShiftForces.force());
+ }
+
if (fr->nbv->emulateGpu() && forceFlags.computeVirial)
{
// non-local part).
bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) &&
(haveCpuForces || DOMAINDECOMP(cr));
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
- force, pme_gpu_get_device_f(fr->pmedata),
- pme_gpu_get_f_ready_synchronizer(fr->pmedata),
- useGpuFBufOps, useGpuFPmeReduction, accumulateForce);
+
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
+ nbv->atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality::Local,
+ nbv->getDeviceForces(),
+ pme_gpu_get_device_f(fr->pmedata),
+ pme_gpu_get_f_ready_synchronizer(fr->pmedata),
+ useGpuFPmeReduction, accumulateForce);
nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::Local);
nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::Local);
}
+ else
+ {
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, force);
+ }
+
}
launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd,
/* Add the force array(s) from nbnxn_atomdata_t to f */
-template <bool useGpu>
void reduceForces(nbnxn_atomdata_t *nbat,
const Nbnxm::AtomLocality locality,
const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *pmeFDeviceBuffer,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce)
+ rvec *f)
{
int a0 = 0;
int na = 0;
return;
}
- if (useGpu)
- {
- Nbnxm::nbnxn_gpu_add_nbat_f_to_f(locality,
- gpu_nbv,
- pmeFDeviceBuffer,
- pmeForcesReady,
- a0, na,
- useGpuFPmeReduction,
- accumulateForce);
- }
- else
- {
- int nth = gmx_omp_nthreads_get(emntNonbonded);
+ int nth = gmx_omp_nthreads_get(emntNonbonded);
- if (nbat->out.size() > 1)
+ if (nbat->out.size() > 1)
+ {
+ if (locality != Nbnxm::AtomLocality::All)
{
- if (locality != Nbnxm::AtomLocality::All)
- {
- gmx_incons("add_f_to_f called with nout>1 and locality!=eatAll");
- }
+ gmx_incons("add_f_to_f called with nout>1 and locality!=eatAll");
+ }
- /* Reduce the force thread output buffers into buffer 0, before adding
- * them to the, differently ordered, "real" force buffer.
- */
- if (nbat->bUseTreeReduce)
- {
- nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat, nth);
- }
- else
- {
- nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat, nth);
- }
+ /* Reduce the force thread output buffers into buffer 0, before adding
+ * them to the, differently ordered, "real" force buffer.
+ */
+ if (nbat->bUseTreeReduce)
+ {
+ nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat, nth);
}
+ else
+ {
+ nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat, nth);
+ }
+ }
#pragma omp parallel for num_threads(nth) schedule(static)
- for (int th = 0; th < nth; th++)
+ for (int th = 0; th < nth; th++)
+ {
+ try
{
- try
- {
- nbnxn_atomdata_add_nbat_f_to_f_part(gridSet, *nbat,
- nbat->out[0],
- a0 + ((th + 0)*na)/nth,
- a0 + ((th + 1)*na)/nth,
- f);
- }
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ nbnxn_atomdata_add_nbat_f_to_f_part(gridSet, *nbat,
+ nbat->out[0],
+ a0 + ((th + 0)*na)/nth,
+ a0 + ((th + 1)*na)/nth,
+ f);
}
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+ }
+}
+
+/* Add the force array(s) from nbnxn_atomdata_t to f */
+void reduceForcesGpu(const Nbnxm::AtomLocality locality,
+ DeviceBuffer<float> totalForcesDevice,
+ const Nbnxm::GridSet &gridSet,
+ void *pmeForcesDevice,
+ GpuEventSynchronizer *pmeForcesReady,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ bool useGpuFPmeReduction,
+ bool accumulateForce)
+{
+ int atomsStart = 0;
+ int numAtoms = 0;
+
+ nbnxn_get_atom_range(locality, gridSet, &atomsStart, &numAtoms);
+
+ if (numAtoms == 0)
+ {
+ /* The are no atoms for this reduction, avoid some overhead */
+ return;
}
+
+ Nbnxm::nbnxn_gpu_add_nbat_f_to_f(locality,
+ totalForcesDevice,
+ gpu_nbv,
+ pmeForcesDevice,
+ pmeForcesReady,
+ atomsStart, numAtoms,
+ useGpuFPmeReduction,
+ accumulateForce);
+}
+
+DeviceBuffer<float> nbnxn_atomdata_get_f_gpu(gmx_nbnxn_gpu_t *gpu_nbv)
+{
+ return Nbnxm::nbnxn_gpu_get_f_gpu(gpu_nbv);
}
-template
-void reduceForces<true>(nbnxn_atomdata_t *nbat,
- const Nbnxm::AtomLocality locality,
- const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *fpme,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce);
-
-template
-void reduceForces<false>(nbnxn_atomdata_t *nbat,
- const Nbnxm::AtomLocality locality,
- const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *fpme,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce);
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat,
gmx::ArrayRef<gmx::RVec> fshift)
gmx_nbnxn_gpu_t *gpu_nbv,
DeviceBuffer<float> coordinatesDevice);
-//! Add the computed forces to \p f, an internal reduction might be performed as well
-template <bool useGpu>
+/*! \brief Add the computed forces to \p f, an internal reduction might be performed as well
+ *
+ * \param[in] nbat Atom data in NBNXM format.
+ * \param[in] locality If the reduction should be performed on local or non-local atoms.
+ * \param[in] gridSet The grids data.
+ * \param[out] totalForce Buffer to accumulate resulting force
+ */
void reduceForces(nbnxn_atomdata_t *nbat,
Nbnxm::AtomLocality locality,
const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *pmeFDeviceBuffer,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce);
-
-
-extern template
-void reduceForces<true>(nbnxn_atomdata_t *nbat,
- const Nbnxm::AtomLocality locality,
- const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *pmeFDeviceBuffer,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce);
-
-extern template
-void reduceForces<false>(nbnxn_atomdata_t *nbat,
- const Nbnxm::AtomLocality locality,
- const Nbnxm::GridSet &gridSet,
- rvec *f,
- void *pmeFDeviceBuffer,
- GpuEventSynchronizer *pmeForcesReady,
- gmx_nbnxn_gpu_t *gpu_nbv,
- bool useGpuFPmeReduction,
- bool accumulateForce);
+ rvec *totalForce);
+
+/*! \brief Reduce forces on the GPU
+ *
+ * \param[in] locality If the reduction should be performed on local or non-local atoms.
+ * \param[out] totalForcesDevice Device buffer to accumulate resulting force.
+ * \param[in] gridSet The grids data.
+ * \param[in] pmeForcesDevice Device buffer with PME forces.
+ * \param[in] pmeForcesReady Event that signals when the PME forces are ready for the reduction.
+ * \param[in] gpu_nbv The NBNXM GPU data structure.
+ * \param[in] useGpuFPmeReduction Whether PME forces should be added.
+ * \param[in] accumulateForce Whether there are usefull data already in the total force buffer.
+ */
+void reduceForcesGpu(Nbnxm::AtomLocality locality,
+ DeviceBuffer<float> totalForcesDevice,
+ const Nbnxm::GridSet &gridSet,
+ void *pmeForcesDevice,
+ GpuEventSynchronizer *pmeForcesReady,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ bool useGpuFPmeReduction,
+ bool accumulateForce);
+
+/*!\brief Getter for the GPU forces buffer
+ *
+ * \todo Will be removed when the buffer management is lifted out of the NBNXM
+ *
+ * \param[in] gpu_nbv The NBNXM GPU data structure.
+ *
+ * \returns Device forces buffer
+ */
+DeviceBuffer<float> nbnxn_atomdata_get_f_gpu(gmx_nbnxn_gpu_t *gpu_nbv);
/* Add the fshift force stored in nbat to fshift */
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat,
}
-/*! \brief CUDA kernel to add part of the force array(s) from nbnxn_atomdata_t to f
+/*! \brief CUDA kernel to sum up the force components
*
- * \param[in] fnb Force in nbat format
- * \param[in] fPmeDeviceBuffer PME force
- * \param[in,out] f Force buffer to be reduced into
- * \param[in] cell Cell index mapping
- * \param[in] atomStart Start atom index
- * \param[in] nAtoms Number of Atoms
+ * \tparam accumulateForce If the initial forces in \p d_fTotal should be saved.
+ * \tparam addPmeForce Whether the PME force should be added to the total.
+ *
+ * \param[in] d_fNB Non-bonded forces in nbat format.
+ * \param[in] d_fPme PME forces.
+ * \param[in,out] d_fTotal Force buffer to be reduced into.
+ * \param[in] cell Cell index mapping.
+ * \param[in] atomStart Start atom index.
+ * \param[in] numAtoms Number of atoms.
*/
-template <bool accumulateForce, bool addPmeF>
+template <bool accumulateForce, bool addPmeForce>
__global__ void
-nbnxn_gpu_add_nbat_f_to_f_kernel(const float3 *__restrict__ fnb,
- const float3 *__restrict__ fPmeDeviceBuffer,
- float3 * f,
- const int *__restrict__ cell,
- const int atomStart,
- const int nAtoms);
-template <bool accumulateForce, bool addPmeF>
+nbnxn_gpu_add_nbat_f_to_f_kernel(const float3 *__restrict__ d_fNB,
+ const float3 *__restrict__ d_fPme,
+ float3 *d_fTotal,
+ const int *__restrict__ d_cell,
+ const int atomStart,
+ const int numAtoms);
+template <bool accumulateForce, bool addPmeForce>
__global__ void
-nbnxn_gpu_add_nbat_f_to_f_kernel(const float3 *__restrict__ fnb,
- const float3 *__restrict__ fPmeDeviceBuffer,
- float3 * f,
- const int *__restrict__ cell,
- const int atomStart,
- const int nAtoms)
+nbnxn_gpu_add_nbat_f_to_f_kernel(const float3 *__restrict__ d_fNB,
+ const float3 *__restrict__ d_fPme,
+ float3 *d_fTotal,
+ const int *__restrict__ d_cell,
+ const int atomStart,
+ const int numAtoms)
{
/* map particle-level parallelism to 1D CUDA thread and block index */
int threadIndex = blockIdx.x*blockDim.x+threadIdx.x;
/* perform addition for each particle*/
- if (threadIndex < nAtoms)
+ if (threadIndex < numAtoms)
{
- int i = cell[atomStart+threadIndex];
- float3 *fDest = (float3 *)&f[atomStart+threadIndex];
+ int i = d_cell[atomStart+threadIndex];
+ float3 *fDest = (float3 *)&d_fTotal[atomStart+threadIndex];
float3 temp;
if (accumulateForce)
{
temp = *fDest;
- temp += fnb[i];
+ temp += d_fNB[i];
}
else
{
- temp = fnb[i];
+ temp = d_fNB[i];
}
- if (addPmeF)
+ if (addPmeForce)
{
- temp += fPmeDeviceBuffer[atomStart+threadIndex];
+ temp += d_fPme[atomStart+threadIndex];
}
*fDest = temp;
/* F buffer operations on GPU: performs force summations and conversion from nb to rvec format. */
void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality atomLocality,
+ DeviceBuffer<float> totalForcesDevice,
gmx_nbnxn_gpu_t *nb,
- void *fPmeDevicePtr,
+ void *pmeForcesDevice,
GpuEventSynchronizer *pmeForcesReady,
int atomStart,
- int nAtoms,
+ int numAtoms,
bool useGpuFPmeReduction,
bool accumulateForce)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
- const InteractionLocality iLocality = gpuAtomToInteractionLocality(atomLocality);
- cudaStream_t stream = nb->stream[iLocality];
- cu_atomdata_t *adat = nb->atdat;
- bool addPmeF = useGpuFPmeReduction;
+ const InteractionLocality iLocality = gpuAtomToInteractionLocality(atomLocality);
+ cudaStream_t stream = nb->stream[iLocality];
+ cu_atomdata_t *adat = nb->atdat;
- if (addPmeF)
+ if (useGpuFPmeReduction)
{
//Stream must wait for PME force completion
pmeForcesReady->enqueueWaitEvent(stream);
config.blockSize[0] = c_bufOpsThreadsPerBlock;
config.blockSize[1] = 1;
config.blockSize[2] = 1;
- config.gridSize[0] = ((nAtoms+1)+c_bufOpsThreadsPerBlock-1)/c_bufOpsThreadsPerBlock;
+ config.gridSize[0] = ((numAtoms+1)+c_bufOpsThreadsPerBlock-1)/c_bufOpsThreadsPerBlock;
config.gridSize[1] = 1;
config.gridSize[2] = 1;
config.sharedMemorySize = 0;
nbnxn_gpu_add_nbat_f_to_f_kernel<true, false> :
nbnxn_gpu_add_nbat_f_to_f_kernel<false, false>;
- if (addPmeF)
+ if (useGpuFPmeReduction)
{
kernelFn = accumulateForce ?
nbnxn_gpu_add_nbat_f_to_f_kernel<true, true> :
nbnxn_gpu_add_nbat_f_to_f_kernel<false, true>;
}
- const float3 *d_f = adat->f;
- float3 *d_fNB = (float3*) nb->frvec;
- const float3 *d_fPme = (float3*) fPmeDevicePtr;
- const int *d_cell = nb->cell;
+ const float3 *d_fNB = adat->f;
+ const float3 *d_fPme = (float3*) pmeForcesDevice;
+ float3 *d_fTotal = (float3*) totalForcesDevice;
+ const int *d_cell = nb->cell;
const auto kernelArgs = prepareGpuKernelArguments(kernelFn, config,
- &d_f,
- &d_fPme,
&d_fNB,
+ &d_fPme,
+ &d_fTotal,
&d_cell,
&atomStart,
- &nAtoms);
+ &numAtoms);
launchGpuKernel(kernelFn, config, nullptr, "FbufferOps", kernelArgs);
}
+DeviceBuffer<float> nbnxn_gpu_get_f_gpu(gmx_nbnxn_gpu_t *nb)
+{
+ return reinterpret_cast< DeviceBuffer<float> >(nb->frvec);
+}
+
void nbnxn_launch_copy_f_to_gpu(const AtomLocality atomLocality,
const Nbnxm::GridSet &gridSet,
gmx_nbnxn_gpu_t *nb,
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
- reduceForces<false>(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), nullptr, nullptr, gpu_nbv, false, false);
+ reduceForces(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()));
wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
}
void
-nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality,
- gmx::ArrayRef<gmx::RVec> force,
- void *fPmeDeviceBuffer,
- GpuEventSynchronizer *pmeForcesReady,
- BufferOpsUseGpu useGpu,
- bool useGpuFPmeReduction,
- bool accumulateForce)
+nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const Nbnxm::AtomLocality locality,
+ DeviceBuffer<float> totalForcesDevice,
+ void *forcesPmeDevice,
+ GpuEventSynchronizer *pmeForcesReady,
+ bool useGpuFPmeReduction,
+ bool accumulateForce)
{
- GMX_ASSERT(!((useGpu == BufferOpsUseGpu::False) && accumulateForce),
- "Accumulatation of force is only valid when GPU buffer ops are active");
-
- GMX_ASSERT((useGpuFPmeReduction == (fPmeDeviceBuffer != nullptr)),
+ GMX_ASSERT((useGpuFPmeReduction == (forcesPmeDevice != nullptr)),
"GPU PME force reduction is only valid when a non-null GPU PME force pointer is available");
/* Skip the reduction if there was no short-range GPU work to do
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
- auto fn = useGpu == BufferOpsUseGpu::True ? reduceForces<true> : reduceForces<false>;
- fn(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), fPmeDeviceBuffer, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce);
+ reduceForcesGpu(locality, totalForcesDevice, pairSearch_->gridSet(), forcesPmeDevice, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce);
wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
}
+DeviceBuffer<float> nonbonded_verlet_t::getDeviceForces()
+{
+ return nbnxn_atomdata_get_f_gpu(gpu_nbv);
+}
+
real nonbonded_verlet_t::pairlistInnerRadius() const
{
return pairlistSets_->params().rlistInner;
void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
gmx::ArrayRef<gmx::RVec> force);
- /*! \brief Add the forces stored in nbat to f, allowing for possibility that GPU buffer ops are active
- * \param [in] locality Local or non-local
- * \param [inout] force Force to be added to
- * \param [in] fPme Force from PME calculation
- * \param [in] pmeForcesReady Event triggered when PME force calculation has completed
- * \param [in] useGpu Whether GPU buffer ops are active
- * \param [in] useGpuFPmeReduction Whether PME force reduction is on GPU
- * \param [in] accumulateForce Whether force should be accumulated or stored
+ /*! \brief Add the forces stored in nbat to total force using GPU buffer opse
+ *
+ * \param [in] locality Local or non-local
+ * \param [in,out] totalForcesDevice Force to be added to
+ * \param [in] forcesPmeDevice Device buffer with PME forces
+ * \param [in] pmeForcesReady Event triggered when PME force calculation has completed
+ * \param [in] useGpuFPmeReduction Whether PME forces should be added
+ * \param [in] accumulateForce If the total force buffer already contains data
*/
- void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
- gmx::ArrayRef<gmx::RVec> force,
- void *fPme,
- GpuEventSynchronizer *pmeForcesReady,
- BufferOpsUseGpu useGpu,
- bool useGpuFPmeReduction,
- bool accumulateForce);
+ void atomdata_add_nbat_f_to_f_gpu(Nbnxm::AtomLocality locality,
+ DeviceBuffer<float> totalForcesDevice,
+ void *forcesPmeDevice,
+ GpuEventSynchronizer *pmeForcesReady,
+ bool useGpuFPmeReduction,
+ bool accumulateForce);
+
+ /*!\brief Getter for the GPU force buffer.
+ *
+ * \todo This function will be removed in future patches as the management of the device buffers
+ * is moved to a separate object.
+ *
+ * \returns The force buffer in plain rvec format.
+ */
+ DeviceBuffer<float> getDeviceForces();
/*! \brief Outer body of function to perform initialization for F buffer operations on GPU. */
void atomdata_init_add_nbat_f_to_f_gpu();
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
int gmx_unused natoms_total) CUDA_FUNC_TERM;
-/*! \brief F buffer operations on GPU: adds nb format force to rvec format. */
+/*! \brief Force buffer operations on GPU.
+ *
+ * Transforms non-bonded forces into plain rvec format and add all the force components to the total
+ * force buffer
+ *
+ * \param[in] atomLocality If the reduction should be performed on local or non-local atoms.
+ * \param[in] totalForcesDevice Device buffer to accumulate resulting force.
+ * \param[in] gpu_nbv The NBNXM GPU data structure.
+ * \param[in] pmeForcesDevice Device buffer with PME forces.
+ * \param[in] pmeForcesReady Event that signals when the PME forces are ready for the reduction.
+ * \param[in] atomStart Index of the first atom to reduce forces for.
+ * \param[in] numAtoms Number of atoms to reduce forces for.
+ * \param[in] useGpuFPmeReduction Whether PME forces should be added.
+ * \param[in] accumulateForce Whether there are usefull data already in the total force buffer.
+ *
+ */
CUDA_FUNC_QUALIFIER
void nbnxn_gpu_add_nbat_f_to_f(AtomLocality gmx_unused atomLocality,
+ DeviceBuffer<float> gmx_unused totalForcesDevice,
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- void gmx_unused *fPmeDevicePtr,
+ void gmx_unused *pmeForcesDevice,
GpuEventSynchronizer gmx_unused *pmeForcesReady,
int gmx_unused atomStart,
- int gmx_unused nAtoms,
+ int gmx_unused numAtoms,
bool gmx_unused useGpuFPmeReduction,
bool gmx_unused accumulateForce) CUDA_FUNC_TERM;
+/*! \brief Getter for the device coordinates buffer.
+ *
+ * \todo This will be removed as the management of the buffers is taken out of the NBNXM module.
+ *
+ * \param[in] gpu_nbv The nonbonded data GPU structure.
+ *
+ * \returns Device coordinates buffer in plain rvec format.
+ */
+CUDA_FUNC_QUALIFIER
+DeviceBuffer<float> nbnxn_gpu_get_f_gpu(gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
+
/*! \brief Copy force buffer from CPU to GPU */
CUDA_FUNC_QUALIFIER
void nbnxn_launch_copy_f_to_gpu(AtomLocality gmx_unused atomLocality,