* \param[in] pme The PME data structure.
* \returns Pointer to coordinate data
*/
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
/*! \brief Get pointer to device copy of force data.
* \param[in] pme The PME data structure.
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
-void *pme_gpu_get_device_x(const gmx_pme_t *pme)
+DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t *pme)
{
- if (!pme || !pme_gpu_active(pme))
- {
- return nullptr;
- }
+ GMX_ASSERT((pme && pme_gpu_active(pme)), "PME GPU coordinates buffer was requested from uninitialized PME module");
return pme_gpu_get_kernelparam_coordinates(pme->gpu);
}
}
}
-void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *pmeGpu)
+DeviceBuffer<float> pme_gpu_get_kernelparam_coordinates(const PmeGpu *pmeGpu)
{
- if (pmeGpu && pmeGpu->kernelParams)
- {
- return pmeGpu->kernelParams->atoms.d_coordinates;
- }
- else
- {
- return nullptr;
- }
+ GMX_ASSERT(pmeGpu && pmeGpu->kernelParams, "PME GPU device buffer was requested in non-GPU build or before the GPU PME was initialized.");
+
+ return pmeGpu->kernelParams->atoms.d_coordinates;
}
void * pme_gpu_get_kernelparam_forces(const PmeGpu *pmeGpu)
* \param[in] pmeGpu The PME GPU structure.
* \returns Pointer to coordinate data
*/
-GPU_FUNC_QUALIFIER void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
/*! \brief Return pointer to device copy of force data.
* \param[in] pmeGpu The PME GPU structure.
}
else
{
- nbv->setCoordinates(Nbnxm::AtomLocality::Local, false,
- x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
+ if (useGpuXBufOps == BufferOpsUseGpu::True)
+ {
+ // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
+ if (!useGpuPme)
+ {
+ nbv->copyCoordinatesToGpu(Nbnxm::AtomLocality::Local, false,
+ x.unpaddedArrayRef());
+ }
+ nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::Local, false,
+ useGpuPme ? pme_gpu_get_device_x(fr->pmedata) : nbv->getDeviceCoordinates());
+ }
+ else
+ {
+ nbv->convertCoordinates(Nbnxm::AtomLocality::Local, false,
+ x.unpaddedArrayRef());
+ }
}
if (bUseGPU)
{
dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
- nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false,
- x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
+ if (useGpuXBufOps == BufferOpsUseGpu::True)
+ {
+ // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
+ if (!useGpuPme)
+ {
+ nbv->copyCoordinatesToGpu(Nbnxm::AtomLocality::NonLocal, false,
+ x.unpaddedArrayRef());
+ }
+ nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::NonLocal, false,
+ useGpuPme ? pme_gpu_get_device_x(fr->pmedata) : nbv->getDeviceCoordinates());
+ }
+ else
+ {
+ nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+ x.unpaddedArrayRef());
+ }
}
}
}
-/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
-template <bool useGpu>
-void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
- const Nbnxm::AtomLocality locality,
- gmx_bool FillLocal,
- const rvec *x,
- nbnxn_atomdata_t *nbat,
- gmx_nbnxn_gpu_t *gpu_nbv,
- void *xPmeDevicePtr)
+// This is slightly different from nbnxn_get_atom_range(...) at the end of the file
+// TODO: Combine if possible
+static void getAtomRanges(const Nbnxm::GridSet &gridSet,
+ const Nbnxm::AtomLocality locality,
+ int *gridBegin,
+ int *gridEnd)
{
- int gridBegin = 0;
- int gridEnd = 0;
-
switch (locality)
{
case Nbnxm::AtomLocality::All:
- gridBegin = 0;
- gridEnd = gridSet.grids().size();
+ *gridBegin = 0;
+ *gridEnd = gridSet.grids().size();
break;
case Nbnxm::AtomLocality::Local:
- gridBegin = 0;
- gridEnd = 1;
+ *gridBegin = 0;
+ *gridEnd = 1;
break;
case Nbnxm::AtomLocality::NonLocal:
- gridBegin = 1;
- gridEnd = gridSet.grids().size();
+ *gridBegin = 1;
+ *gridEnd = gridSet.grids().size();
break;
case Nbnxm::AtomLocality::Count:
GMX_ASSERT(false, "Count is invalid locality specifier");
break;
}
+}
- if (FillLocal)
+/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
+void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
+ const Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ const rvec *coordinates,
+ nbnxn_atomdata_t *nbat)
+{
+
+ int gridBegin = 0;
+ int gridEnd = 0;
+ getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+ if (fillLocal)
{
nbat->natoms_local = gridSet.grids()[0].atomIndexEnd();
}
- if (useGpu)
- {
- for (int g = gridBegin; g < gridEnd; g++)
- {
- nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g],
- FillLocal && g == 0,
- gpu_nbv,
- xPmeDevicePtr,
- locality,
- x, g, gridSet.numColumnsMax());
- }
- }
- else
- {
- const int nth = gmx_omp_nthreads_get(emntPairsearch);
+ const int nth = gmx_omp_nthreads_get(emntPairsearch);
#pragma omp parallel for num_threads(nth) schedule(static)
- for (int th = 0; th < nth; th++)
+ for (int th = 0; th < nth; th++)
+ {
+ try
{
- try
+ for (int g = gridBegin; g < gridEnd; g++)
{
- for (int g = gridBegin; g < gridEnd; g++)
- {
- const Nbnxm::Grid &grid = gridSet.grids()[g];
- const int numCellsXY = grid.numColumns();
+ const Nbnxm::Grid &grid = gridSet.grids()[g];
+ const int numCellsXY = grid.numColumns();
- const int cxy0 = (numCellsXY* th + nth - 1)/nth;
- const int cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth;
+ const int cxy0 = (numCellsXY* th + nth - 1)/nth;
+ const int cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth;
- for (int cxy = cxy0; cxy < cxy1; cxy++)
- {
- const int na = grid.numAtomsInColumn(cxy);
- const int ash = grid.firstAtomInColumn(cxy);
+ for (int cxy = cxy0; cxy < cxy1; cxy++)
+ {
+ const int na = grid.numAtomsInColumn(cxy);
+ const int ash = grid.firstAtomInColumn(cxy);
- int na_fill;
- if (g == 0 && FillLocal)
- {
- na_fill = grid.paddedNumAtomsInColumn(cxy);
- }
- else
- {
- /* We fill only the real particle locations.
- * We assume the filling entries at the end have been
- * properly set before during pair-list generation.
- */
- na_fill = na;
- }
- copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
- na, na_fill, x,
- nbat->XFormat, nbat->x().data(), ash);
+ int na_fill;
+ if (g == 0 && fillLocal)
+ {
+ na_fill = grid.paddedNumAtomsInColumn(cxy);
+ }
+ else
+ {
+ /* We fill only the real particle locations.
+ * We assume the filling entries at the end have been
+ * properly set before during pair-list generation.
+ */
+ na_fill = na;
}
+ copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
+ na, na_fill, coordinates,
+ nbat->XFormat, nbat->x().data(), ash);
}
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
}
-template
-void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
- const Nbnxm::AtomLocality,
- gmx_bool,
- const rvec*,
- nbnxn_atomdata_t *,
- gmx_nbnxn_gpu_t*,
- void *);
-template
-void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
- const Nbnxm::AtomLocality,
- gmx_bool,
- const rvec*,
- nbnxn_atomdata_t *,
- gmx_nbnxn_gpu_t*,
- void *);
+void nbnxn_atomdata_copy_x_to_gpu(const Nbnxm::GridSet &gridSet,
+ const Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ nbnxn_atomdata_t *nbat,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ const rvec *coordinatesHost)
+{
+ int gridBegin = 0;
+ int gridEnd = 0;
+ getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+ if (fillLocal)
+ {
+ nbat->natoms_local = gridSet.grids()[0].atomIndexEnd();
+ }
+
+ for (int g = gridBegin; g < gridEnd; g++)
+ {
+ nbnxn_gpu_copy_x_to_gpu(gridSet.grids()[g],
+ fillLocal && g == 0,
+ gpu_nbv,
+ locality,
+ coordinatesHost,
+ g,
+ gridSet.numColumnsMax());
+ }
+}
+
+DeviceBuffer<float> nbnxn_atomdata_get_x_gpu(gmx_nbnxn_gpu_t *gpu_nbv)
+{
+ return Nbnxm::nbnxn_gpu_get_x_gpu(gpu_nbv);
+}
+
+/* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/
+void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet,
+ const Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ DeviceBuffer<float> coordinatesDevice)
+{
+
+ int gridBegin = 0;
+ int gridEnd = 0;
+ getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+ for (int g = gridBegin; g < gridEnd; g++)
+ {
+ nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g],
+ fillLocal && g == 0,
+ gpu_nbv,
+ coordinatesDevice,
+ locality,
+ g,
+ gridSet.numColumnsMax());
+ }
+}
static void
nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
#include <cstdio>
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/basedefinitions.h"
rvec *shift_vec,
nbnxn_atomdata_t *nbat);
-/* Copy x to nbat->x.
- * FillLocal tells if the local filler particle coordinates should be zeroed.
+/*! \brief Transform coordinates to xbat layout
+ *
+ * Creates a copy of the coordinates buffer using short-range ordering.
+ *
+ * \param[in] gridSet The grids data.
+ * \param[in] locality If the transformation should be applied to local or non local coordinates.
+ * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
+ * \param[in] coordinates Coordinates in plain rvec format.
+ * \param[in,out] nbat Data in NBNXM format, used for mapping formats and to locate the output buffer.
*/
-template <bool useGpu>
void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
Nbnxm::AtomLocality locality,
- gmx_bool FillLocal,
- const rvec *x,
- nbnxn_atomdata_t *nbat,
- gmx_nbnxn_gpu_t *gpu_nbv,
- void *xPmeDevicePtr);
+ bool fillLocal,
+ const rvec *coordinates,
+ nbnxn_atomdata_t *nbat);
-extern template
-void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
- const Nbnxm::AtomLocality,
- gmx_bool,
- const rvec*,
- nbnxn_atomdata_t *,
- gmx_nbnxn_gpu_t*,
- void *);
-extern template
-void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
- const Nbnxm::AtomLocality,
- gmx_bool,
- const rvec*,
- nbnxn_atomdata_t *,
- gmx_nbnxn_gpu_t*,
- void *);
+/*! \brief Copies the coordinates to the GPU (in plain rvec format)
+ *
+ * This function copied data to the gpu so that the transformation to the NBNXM format can be done on the GPU.
+ *
+ * \param[in] gridSet The grids data.
+ * \param[in] locality If local or non local coordinates should be copied.
+ * \param[in] fillLocal If the local filler particle coordinates should be zeroed.
+ * \param[in] nbat Data in NBNXM format, used to zero coordinates of filler particles.
+ * \param[in] gpu_nbv The NBNXM GPU data structure.
+ * \param[in] coordinatesHost Coordinates to be copied (in plain rvec format).
+ */
+void nbnxn_atomdata_copy_x_to_gpu(const Nbnxm::GridSet &gridSet,
+ Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ nbnxn_atomdata_t *nbat,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ const rvec *coordinatesHost);
+
+/*!\brief Getter for the GPU coordinates buffer
+ *
+ * \param[in] gpu_nbv The NBNXM GPU data structure.
+ */
+DeviceBuffer<float> nbnxn_atomdata_get_x_gpu(gmx_nbnxn_gpu_t *gpu_nbv);
+
+/*! \brief Transform coordinates to xbat layout on GPU
+ *
+ * Creates a GPU copy of the coordinates buffer using short-range ordering.
+ * As input, uses coordinates in plain rvec format in GPU memory.
+ *
+ * \param[in] gridSet The grids data.
+ * \param[in] locality If the transformation should be applied to local or non local coordinates.
+ * \param[in] fillLocal Tells if the local filler particle coordinates should be zeroed.
+ * \param[in,out] gpu_nbv The NBNXM GPU data structure.
+ * \param[in] coordinatesDevice Coordinates to be copied (in plain rvec format).
+ */
+void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet &gridSet,
+ Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ gmx_nbnxn_gpu_t *gpu_nbv,
+ DeviceBuffer<float> coordinatesDevice);
//! Add the computed forces to \p f, an internal reduction might be performed as well
template <bool useGpu>
}
}
-/* X buffer operations on GPU: performs conversion from rvec to nb format. */
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid &grid,
- bool setFillerCoords,
- gmx_nbnxn_gpu_t *nb,
- void *xPmeDevicePtr,
- const Nbnxm::AtomLocality locality,
- const rvec *x,
- int gridId,
- int numColumnsMax)
+/* X buffer operations on GPU: copies coordinates to the GPU in rvec format. */
+void nbnxn_gpu_copy_x_to_gpu(const Nbnxm::Grid &grid,
+ bool setFillerCoords,
+ gmx_nbnxn_gpu_t *nb,
+ const Nbnxm::AtomLocality locality,
+ const rvec *coordinatesHost,
+ int gridId,
+ int numColumnsMax)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
- GMX_ASSERT(x, "Need a valid x pointer");
+ GMX_ASSERT(coordinatesHost, "Need a valid host pointer");
- cu_atomdata_t *adat = nb->atdat;
bool bDoTime = nb->bDoTime;
const int numColumns = grid.numColumns();
return;
}
- const rvec *d_x;
+ if (bDoTime)
+ {
+ nb->timers->xf[locality].nb_h2d.openTimingRegion(stream);
+ }
- // copy of coordinates will be required if null pointer has been
- // passed to function
- // TODO improve this mechanism
- bool copyCoord = (xPmeDevicePtr == nullptr);
+ rvec *devicePtrDest = reinterpret_cast<rvec *> (nb->xrvec[copyAtomStart]);
+ const rvec *devicePtrSrc = reinterpret_cast<const rvec *> (coordinatesHost[copyAtomStart]);
+ copyToDeviceBuffer(&devicePtrDest, devicePtrSrc, 0, nCopyAtoms,
+ stream, GpuApiCallBehavior::Async, nullptr);
- // copy X-coordinate data to device
- if (copyCoord)
+ if (bDoTime)
{
- if (bDoTime)
- {
- nb->timers->xf[locality].nb_h2d.openTimingRegion(stream);
- }
+ nb->timers->xf[locality].nb_h2d.closeTimingRegion(stream);
+ }
+}
- rvec *devicePtrDest = reinterpret_cast<rvec *> (nb->xrvec[copyAtomStart]);
- const rvec *devicePtrSrc = reinterpret_cast<const rvec *> (x[copyAtomStart]);
- copyToDeviceBuffer(&devicePtrDest, devicePtrSrc, 0, nCopyAtoms,
- stream, GpuApiCallBehavior::Async, nullptr);
+DeviceBuffer<float> nbnxn_gpu_get_x_gpu(gmx_nbnxn_gpu_t *nb)
+{
+ return reinterpret_cast< DeviceBuffer<float> >(nb->xrvec);
+}
- if (bDoTime)
- {
- nb->timers->xf[locality].nb_h2d.closeTimingRegion(stream);
- }
+/* X buffer operations on GPU: performs conversion from rvec to nb format. */
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid &grid,
+ bool setFillerCoords,
+ gmx_nbnxn_gpu_t *nb,
+ DeviceBuffer<float> coordinatesDevice,
+ const Nbnxm::AtomLocality locality,
+ int gridId,
+ int numColumnsMax)
+{
+ GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
- d_x = nb->xrvec;
- }
- else //coordinates have already been copied by PME stream
- {
- d_x = (rvec*) xPmeDevicePtr;
- }
- GMX_ASSERT(d_x, "Need a valid d_x pointer");
+ cu_atomdata_t *adat = nb->atdat;
+
+ const int numColumns = grid.numColumns();
+ const int cellOffset = grid.cellOffset();
+ const int numAtomsPerCell = grid.numAtomsPerCell();
+ Nbnxm::InteractionLocality interactionLoc = gpuAtomToInteractionLocality(locality);
+ int nCopyAtoms = grid.srcAtomEnd() - grid.srcAtomBegin();
+ int copyAtomStart = grid.srcAtomBegin();
+
+ cudaStream_t stream = nb->stream[interactionLoc];
+
+ // TODO: This will only work with CUDA
+ GMX_ASSERT(coordinatesDevice, "Need a valid device pointer");
/* launch kernel on GPU */
&numColumns,
&xqPtr,
&setFillerCoords,
- &d_x,
+ &coordinatesDevice,
&d_atomIndices,
&d_cxy_na,
&d_cxy_ind,
nbnxn_atomdata_set(nbat.get(), pairSearch_->gridSet(), &mdatoms, atomInfo.data());
}
-void nonbonded_verlet_t::setCoordinates(const Nbnxm::AtomLocality locality,
- const bool fillLocal,
- gmx::ArrayRef<const gmx::RVec> x,
- BufferOpsUseGpu useGpu,
- void *xPmeDevicePtr)
+void nonbonded_verlet_t::convertCoordinates(const Nbnxm::AtomLocality locality,
+ const bool fillLocal,
+ gmx::ArrayRef<const gmx::RVec> coordinates)
{
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
- auto fnPtr = (useGpu == BufferOpsUseGpu::True) ?
- nbnxn_atomdata_copy_x_to_nbat_x<true> :
- nbnxn_atomdata_copy_x_to_nbat_x<false>;
+ nbnxn_atomdata_copy_x_to_nbat_x(pairSearch_->gridSet(), locality, fillLocal,
+ as_rvec_array(coordinates.data()),
+ nbat.get());
- fnPtr(pairSearch_->gridSet(), locality, fillLocal,
- as_rvec_array(x.data()),
- nbat.get(), gpu_nbv, xPmeDevicePtr);
+ wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
+ wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+}
+
+
+void nonbonded_verlet_t::copyCoordinatesToGpu(const Nbnxm::AtomLocality locality,
+ const bool fillLocal,
+ gmx::ArrayRef<const gmx::RVec> coordinatesHost)
+{
+ wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
+
+ nbnxn_atomdata_copy_x_to_gpu(pairSearch_->gridSet(), locality, fillLocal,
+ nbat.get(), gpu_nbv,
+ as_rvec_array(coordinatesHost.data()));
+
+ wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
+ wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+}
+
+DeviceBuffer<float> nonbonded_verlet_t::getDeviceCoordinates()
+{
+ return nbnxn_atomdata_get_x_gpu(gpu_nbv);
+}
+
+void nonbonded_verlet_t::convertCoordinatesGpu(const Nbnxm::AtomLocality locality,
+ const bool fillLocal,
+ DeviceBuffer<float> coordinatesDevice)
+{
+ wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
+
+ nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, fillLocal,
+ gpu_nbv,
+ coordinatesDevice);
wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
#include <memory>
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/enumerationhelpers.h"
void setAtomProperties(const t_mdatoms &mdatoms,
gmx::ArrayRef<const int> atomInfo);
- //! Updates the coordinates in Nbnxm for the given locality
- void setCoordinates(Nbnxm::AtomLocality locality,
- bool fillLocal,
- gmx::ArrayRef<const gmx::RVec> x,
- BufferOpsUseGpu useGpu,
- void *xPmeDevicePtr);
+ /*!\brief Convert the coordinates to NBNXM format for the given locality.
+ *
+ * The API function for the transformation of the coordinates from one layout to another.
+ *
+ * \param[in] locality Whether coordinates for local or non-local atoms should be transformed.
+ * \param[in] fillLocal If the coordinates for filler particles should be zeroed.
+ * \param[in] coordinates Coordinates in plain rvec format to be transformed.
+ */
+ void convertCoordinates(Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ gmx::ArrayRef<const gmx::RVec> coordinates);
+
+ /*!\brief Copy coordinates to the GPU memory.
+ *
+ * This function uses the internal NBNXM GPU pointer to copy coordinates in the plain rvec format
+ * into the GPU memory.
+ *
+ * \todo This function will be removed in future patches as the management of the device buffers
+ * is moved to a separate object.
+ *
+ * \param[in] locality Whether coordinates for local or non-local atoms should be transformed.
+ * \param[in] fillLocal If the coordinates for filler particles should be zeroed.
+ * \param[in] coordinatesHost Coordinates in plain rvec format to be transformed.
+ */
+ void copyCoordinatesToGpu(Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ gmx::ArrayRef<const gmx::RVec> coordinatesHost);
+
+ /*!\brief Getter for the GPU coordinates buffer.
+ *
+ * \todo This function will be removed in future patches as the management of the device buffers
+ * is moved to a separate object.
+ *
+ * \returns The coordinates buffer in plain rvec format.
+ */
+ DeviceBuffer<float> getDeviceCoordinates();
+
+ /*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
+ *
+ * The API function for the transformation of the coordinates from one layout to another in the GPU memory.
+ *
+ * \param[in] locality Whether coordinates for local or non-local atoms should be transformed.
+ * \param[in] fillLocal If the coordinates for filler particles should be zeroed.
+ * \param[in] coordinatesDevice GPU coordinates buffer in plain rvec format to be transformed.
+ */
+ void convertCoordinatesGpu(Nbnxm::AtomLocality locality,
+ bool fillLocal,
+ DeviceBuffer<float> coordinatesDevice);
//! Init for GPU version of setup coordinates in Nbnxm
void atomdata_init_copy_x_to_nbat_x_gpu();
void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused &gridSet,
gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM;
+/*! \brief Copy coordinates from host to device memory.
+ *
+ * \todo This will be removed as the management of the buffers is taken out of the NBNXM module.
+ *
+ * \param[in] grid Grid to be copied.
+ * \param[in] setFillerCoords If the filler coordinates are used.
+ * \param[in,out] gpu_nbv The nonbonded data GPU structure.
+ * \param[in] locality Copy coordinates for local or non-local atoms.
+ * \param[in] coordinatesHost Host-side coordinates in plain rvec format.
+ * \param[in] gridId Index of the grid being copied.
+ * \param[in] numColumnsMax Maximum number of columns in the grid.
+ */
+CUDA_FUNC_QUALIFIER
+void nbnxn_gpu_copy_x_to_gpu(const Nbnxm::Grid gmx_unused &grid,
+ bool gmx_unused setFillerCoords,
+ gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
+ Nbnxm::AtomLocality gmx_unused locality,
+ const rvec gmx_unused *coordinatesHost,
+ int gmx_unused gridId,
+ int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
+
+/*! \brief Getter for the device coordinates buffer.
+ *
+ * \todo This will be removed as the management of the buffers is taken out of the NBNXM module.
+ *
+ * \param[in] gpu_nbv The nonbonded data GPU structure.
+ *
+ * \returns Device coordinates buffer in plain rvec format.
+ */
+CUDA_FUNC_QUALIFIER
+DeviceBuffer<float> nbnxn_gpu_get_x_gpu(gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
+
+
/*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
+ *
+ * \param[in] grid Grid to be converted.
+ * \param[in] setFillerCoords If the filler coordinates are used.
+ * \param[in,out] gpu_nbv The nonbonded data GPU structure.
+ * \param[in] coordinatesDevice Device-side coordinates in plain rvec format.
+ * \param[in] locality Copy coordinates for local or non-local atoms.
+ * \param[in] gridId Index of the grid being converted.
+ * \param[in] numColumnsMax Maximum number of columns in the grid.
*/
CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused &grid,
- bool gmx_unused setFillerCoords,
- gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
- void gmx_unused *xPmeDevicePtr,
- Nbnxm::AtomLocality gmx_unused locality,
- const rvec gmx_unused *x,
- int gmx_unused gridId,
- int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused &grid,
+ bool gmx_unused setFillerCoords,
+ gmx_nbnxn_gpu_t gmx_unused *gpu_nbv,
+ DeviceBuffer<float> gmx_unused coordinatesDevice,
+ Nbnxm::AtomLocality gmx_unused locality,
+ int gmx_unused gridId,
+ int gmx_unused numColumnsMax) CUDA_FUNC_TERM;
/*! \brief Sync the nonlocal stream with dependent tasks in the local queue.
* \param[in] nb The nonbonded data GPU structure