Decouple coordinates buffer management from buffer ops in NBNXM
authorArtem Zhmurov <zhmurov@gmail.com>
Tue, 10 Sep 2019 20:52:37 +0000 (22:52 +0200)
committerSzilárd Páll <pall.szilard@gmail.com>
Wed, 11 Sep 2019 16:35:26 +0000 (18:35 +0200)
To make transition to the new device-side state propagator data
manager easier, the copy and plain buffers management have to be
decoupled from the actual operations on data in the buffers.
This commit splits the corresponding functions into two, and
decouples CPU- and GPU-based coordinate buffer ops one from
another. Since the coordinates are now clearly communicated
between PME and NBNXM, the plain pointer on the CPU-side code is
replaced with the DeviceBuffer<float>. The getters for the device-
side buffer now never return nullptr, but exit with the assertion
if the corresponding data is not intialized.

Change-Id: Ic793f815870a8b4f414a9b7ca0a5001e58f49c7a

src/gromacs/ewald/pme.h
src/gromacs/ewald/pme_gpu.cpp
src/gromacs/ewald/pme_gpu_internal.cpp
src/gromacs/ewald/pme_gpu_internal.h
src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/atomdata.cpp
src/gromacs/nbnxm/atomdata.h
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
src/gromacs/nbnxm/nbnxm.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_gpu.h

index 1ca7dfbea9ca222de47b2848aa85f935e125089e..3e173d217f4f510aa918a33898a420457d4f1da0 100644 (file)
@@ -476,7 +476,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_reinit_computation(const gmx_pme_t *GPU_FUNC_ARG
  * \param[in] pme            The PME data structure.
  * \returns                  Pointer to coordinate data
  */
-GPU_FUNC_QUALIFIER void *pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t *GPU_FUNC_ARGUMENT(pme)) GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
 
 /*! \brief Get pointer to device copy of force data.
  * \param[in] pme            The PME data structure.
index 1f43a9fe5f2893a248232549c8321962e6eced20..2bf148a59320ff92d11fc2ac49fa69531707efcc 100644 (file)
@@ -420,12 +420,9 @@ void pme_gpu_reinit_computation(const gmx_pme_t *pme,
     wallcycle_stop(wcycle, ewcLAUNCH_GPU);
 }
 
-void *pme_gpu_get_device_x(const gmx_pme_t *pme)
+DeviceBuffer<float> pme_gpu_get_device_x(const gmx_pme_t *pme)
 {
-    if (!pme || !pme_gpu_active(pme))
-    {
-        return nullptr;
-    }
+    GMX_ASSERT((pme && pme_gpu_active(pme)), "PME GPU coordinates buffer was requested from uninitialized PME module");
     return pme_gpu_get_kernelparam_coordinates(pme->gpu);
 }
 
index aef078d8d4021b77efee262a360e0aae10fdf926..8960f584efdf73ab7bc7b0e13ca40ae00ad781dd 100644 (file)
@@ -1289,16 +1289,11 @@ void pme_gpu_gather(PmeGpu                *pmeGpu,
     }
 }
 
-void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *pmeGpu)
+DeviceBuffer<float> pme_gpu_get_kernelparam_coordinates(const PmeGpu *pmeGpu)
 {
-    if (pmeGpu && pmeGpu->kernelParams)
-    {
-        return pmeGpu->kernelParams->atoms.d_coordinates;
-    }
-    else
-    {
-        return nullptr;
-    }
+    GMX_ASSERT(pmeGpu && pmeGpu->kernelParams, "PME GPU device buffer was requested in non-GPU build or before the GPU PME was initialized.");
+
+    return pmeGpu->kernelParams->atoms.d_coordinates;
 }
 
 void * pme_gpu_get_kernelparam_forces(const PmeGpu *pmeGpu)
index 8a8c54609a39712e4110d89098cb8d5b7d1666c3..3abbed5c4d03b0065fbfe5d07639d97168e12367 100644 (file)
@@ -448,7 +448,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu                *GPU_FUNC_ARGUMENT(
  * \param[in] pmeGpu         The PME GPU structure.
  * \returns                  Pointer to coordinate data
  */
-GPU_FUNC_QUALIFIER void * pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+GPU_FUNC_QUALIFIER DeviceBuffer<float> pme_gpu_get_kernelparam_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
 
 /*! \brief Return pointer to device copy of force data.
  * \param[in] pmeGpu         The PME GPU structure.
index 724160a4722568a7a3f266fb9bb316ef77e3fc88..853f97b021063e196bd11bdf89422f78ee511f37 100644 (file)
@@ -1120,8 +1120,22 @@ void do_force(FILE                                     *fplog,
     }
     else
     {
-        nbv->setCoordinates(Nbnxm::AtomLocality::Local, false,
-                            x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
+        if (useGpuXBufOps == BufferOpsUseGpu::True)
+        {
+            // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
+            if (!useGpuPme)
+            {
+                nbv->copyCoordinatesToGpu(Nbnxm::AtomLocality::Local, false,
+                                          x.unpaddedArrayRef());
+            }
+            nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::Local, false,
+                                       useGpuPme ? pme_gpu_get_device_x(fr->pmedata) : nbv->getDeviceCoordinates());
+        }
+        else
+        {
+            nbv->convertCoordinates(Nbnxm::AtomLocality::Local, false,
+                                    x.unpaddedArrayRef());
+        }
     }
 
     if (bUseGPU)
@@ -1187,8 +1201,22 @@ void do_force(FILE                                     *fplog,
         {
             dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
 
-            nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false,
-                                x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
+            if (useGpuXBufOps == BufferOpsUseGpu::True)
+            {
+                // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
+                if (!useGpuPme)
+                {
+                    nbv->copyCoordinatesToGpu(Nbnxm::AtomLocality::NonLocal, false,
+                                              x.unpaddedArrayRef());
+                }
+                nbv->convertCoordinatesGpu(Nbnxm::AtomLocality::NonLocal, false,
+                                           useGpuPme ? pme_gpu_get_device_x(fr->pmedata) : nbv->getDeviceCoordinates());
+            }
+            else
+            {
+                nbv->convertCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+                                        x.unpaddedArrayRef());
+            }
 
         }
 
index e78a65945e0df3ea74579344149693a7402eed21..6424b1558e4d958aa566b88db8872d970965cba1 100644 (file)
@@ -998,116 +998,148 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool          bDynamicBox,
     }
 }
 
-/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
-template <bool useGpu>
-void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet     &gridSet,
-                                     const Nbnxm::AtomLocality locality,
-                                     gmx_bool                  FillLocal,
-                                     const rvec               *x,
-                                     nbnxn_atomdata_t         *nbat,
-                                     gmx_nbnxn_gpu_t          *gpu_nbv,
-                                     void                     *xPmeDevicePtr)
+// This is slightly different from nbnxn_get_atom_range(...) at the end of the file
+// TODO: Combine if possible
+static void getAtomRanges(const Nbnxm::GridSet      &gridSet,
+                          const Nbnxm::AtomLocality  locality,
+                          int                       *gridBegin,
+                          int                       *gridEnd)
 {
-    int gridBegin = 0;
-    int gridEnd   = 0;
-
     switch (locality)
     {
         case Nbnxm::AtomLocality::All:
-            gridBegin = 0;
-            gridEnd   = gridSet.grids().size();
+            *gridBegin = 0;
+            *gridEnd   = gridSet.grids().size();
             break;
         case Nbnxm::AtomLocality::Local:
-            gridBegin = 0;
-            gridEnd   = 1;
+            *gridBegin = 0;
+            *gridEnd   = 1;
             break;
         case Nbnxm::AtomLocality::NonLocal:
-            gridBegin = 1;
-            gridEnd   = gridSet.grids().size();
+            *gridBegin = 1;
+            *gridEnd   = gridSet.grids().size();
             break;
         case Nbnxm::AtomLocality::Count:
             GMX_ASSERT(false, "Count is invalid locality specifier");
             break;
     }
+}
 
-    if (FillLocal)
+/* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
+void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet     &gridSet,
+                                     const Nbnxm::AtomLocality locality,
+                                     bool                      fillLocal,
+                                     const rvec               *coordinates,
+                                     nbnxn_atomdata_t         *nbat)
+{
+
+    int gridBegin = 0;
+    int gridEnd   = 0;
+    getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+    if (fillLocal)
     {
         nbat->natoms_local = gridSet.grids()[0].atomIndexEnd();
     }
 
-    if (useGpu)
-    {
-        for (int g = gridBegin; g < gridEnd; g++)
-        {
-            nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g],
-                                  FillLocal && g == 0,
-                                  gpu_nbv,
-                                  xPmeDevicePtr,
-                                  locality,
-                                  x, g, gridSet.numColumnsMax());
-        }
-    }
-    else
-    {
-        const int nth = gmx_omp_nthreads_get(emntPairsearch);
+    const int nth = gmx_omp_nthreads_get(emntPairsearch);
 #pragma omp parallel for num_threads(nth) schedule(static)
-        for (int th = 0; th < nth; th++)
+    for (int th = 0; th < nth; th++)
+    {
+        try
         {
-            try
+            for (int g = gridBegin; g < gridEnd; g++)
             {
-                for (int g = gridBegin; g < gridEnd; g++)
-                {
-                    const Nbnxm::Grid  &grid       = gridSet.grids()[g];
-                    const int           numCellsXY = grid.numColumns();
+                const Nbnxm::Grid  &grid       = gridSet.grids()[g];
+                const int           numCellsXY = grid.numColumns();
 
-                    const int           cxy0 = (numCellsXY* th      + nth - 1)/nth;
-                    const int           cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth;
+                const int           cxy0 = (numCellsXY* th      + nth - 1)/nth;
+                const int           cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth;
 
-                    for (int cxy = cxy0; cxy < cxy1; cxy++)
-                    {
-                        const int na  = grid.numAtomsInColumn(cxy);
-                        const int ash = grid.firstAtomInColumn(cxy);
+                for (int cxy = cxy0; cxy < cxy1; cxy++)
+                {
+                    const int na  = grid.numAtomsInColumn(cxy);
+                    const int ash = grid.firstAtomInColumn(cxy);
 
-                        int       na_fill;
-                        if (g == 0 && FillLocal)
-                        {
-                            na_fill = grid.paddedNumAtomsInColumn(cxy);
-                        }
-                        else
-                        {
-                            /* We fill only the real particle locations.
-                             * We assume the filling entries at the end have been
-                             * properly set before during pair-list generation.
-                             */
-                            na_fill = na;
-                        }
-                        copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
-                                               na, na_fill, x,
-                                               nbat->XFormat, nbat->x().data(), ash);
+                    int       na_fill;
+                    if (g == 0 && fillLocal)
+                    {
+                        na_fill = grid.paddedNumAtomsInColumn(cxy);
+                    }
+                    else
+                    {
+                        /* We fill only the real particle locations.
+                         * We assume the filling entries at the end have been
+                         * properly set before during pair-list generation.
+                         */
+                        na_fill = na;
                     }
+                    copy_rvec_to_nbat_real(gridSet.atomIndices().data() + ash,
+                                           na, na_fill, coordinates,
+                                           nbat->XFormat, nbat->x().data(), ash);
                 }
             }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
         }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
     }
 }
 
-template
-void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
-                                           const Nbnxm::AtomLocality,
-                                           gmx_bool,
-                                           const rvec*,
-                                           nbnxn_atomdata_t *,
-                                           gmx_nbnxn_gpu_t*,
-                                           void *);
-template
-void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
-                                            const Nbnxm::AtomLocality,
-                                            gmx_bool,
-                                            const rvec*,
-                                            nbnxn_atomdata_t *,
-                                            gmx_nbnxn_gpu_t*,
-                                            void *);
+void nbnxn_atomdata_copy_x_to_gpu(const Nbnxm::GridSet     &gridSet,
+                                  const Nbnxm::AtomLocality locality,
+                                  bool                      fillLocal,
+                                  nbnxn_atomdata_t         *nbat,
+                                  gmx_nbnxn_gpu_t          *gpu_nbv,
+                                  const rvec               *coordinatesHost)
+{
+    int gridBegin = 0;
+    int gridEnd   = 0;
+    getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+    if (fillLocal)
+    {
+        nbat->natoms_local = gridSet.grids()[0].atomIndexEnd();
+    }
+
+    for (int g = gridBegin; g < gridEnd; g++)
+    {
+        nbnxn_gpu_copy_x_to_gpu(gridSet.grids()[g],
+                                fillLocal && g == 0,
+                                gpu_nbv,
+                                locality,
+                                coordinatesHost,
+                                g,
+                                gridSet.numColumnsMax());
+    }
+}
+
+DeviceBuffer<float> nbnxn_atomdata_get_x_gpu(gmx_nbnxn_gpu_t *gpu_nbv)
+{
+    return Nbnxm::nbnxn_gpu_get_x_gpu(gpu_nbv);
+}
+
+/* Copies (and reorders) the coordinates to nbnxn_atomdata_t on the GPU*/
+void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet     &gridSet,
+                                    const Nbnxm::AtomLocality locality,
+                                    bool                      fillLocal,
+                                    gmx_nbnxn_gpu_t          *gpu_nbv,
+                                    DeviceBuffer<float>       coordinatesDevice)
+{
+
+    int gridBegin = 0;
+    int gridEnd   = 0;
+    getAtomRanges(gridSet, locality, &gridBegin, &gridEnd);
+
+    for (int g = gridBegin; g < gridEnd; g++)
+    {
+        nbnxn_gpu_x_to_nbat_x(gridSet.grids()[g],
+                              fillLocal && g == 0,
+                              gpu_nbv,
+                              coordinatesDevice,
+                              locality,
+                              g,
+                              gridSet.numColumnsMax());
+    }
+}
 
 static void
 nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
index 6bbe082c0b6b6ed8b80a501b429748d23221f8da..488290bb5b2e3e5257ecfa3e119ec927f2b95bc5 100644 (file)
@@ -38,6 +38,7 @@
 
 #include <cstdio>
 
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
 #include "gromacs/gpu_utils/hostallocator.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/utility/basedefinitions.h"
@@ -309,34 +310,62 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool          dynamic_box,
                                   rvec             *shift_vec,
                                   nbnxn_atomdata_t *nbat);
 
-/* Copy x to nbat->x.
- * FillLocal tells if the local filler particle coordinates should be zeroed.
+/*! \brief Transform coordinates to xbat layout
+ *
+ * Creates a copy of the coordinates buffer using short-range ordering.
+ *
+ * \param[in] gridSet      The grids data.
+ * \param[in] locality     If the transformation should be applied to local or non local coordinates.
+ * \param[in] fillLocal    Tells if the local filler particle coordinates should be zeroed.
+ * \param[in] coordinates  Coordinates in plain rvec format.
+ * \param[in,out] nbat     Data in NBNXM format, used for mapping formats and to locate the output buffer.
  */
-template <bool useGpu>
 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet       &gridSet,
                                      Nbnxm::AtomLocality         locality,
-                                     gmx_bool                    FillLocal,
-                                     const rvec                 *x,
-                                     nbnxn_atomdata_t           *nbat,
-                                     gmx_nbnxn_gpu_t            *gpu_nbv,
-                                     void                       *xPmeDevicePtr);
+                                     bool                        fillLocal,
+                                     const rvec                 *coordinates,
+                                     nbnxn_atomdata_t           *nbat);
 
-extern template
-void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
-                                           const Nbnxm::AtomLocality,
-                                           gmx_bool,
-                                           const rvec*,
-                                           nbnxn_atomdata_t *,
-                                           gmx_nbnxn_gpu_t*,
-                                           void *);
-extern template
-void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
-                                            const Nbnxm::AtomLocality,
-                                            gmx_bool,
-                                            const rvec*,
-                                            nbnxn_atomdata_t *,
-                                            gmx_nbnxn_gpu_t*,
-                                            void *);
+/*! \brief Copies the coordinates to the GPU (in plain rvec format)
+ *
+ *  This function copied data to the gpu so that the transformation to the NBNXM format can be done on the GPU.
+ *
+ * \param[in] gridSet          The grids data.
+ * \param[in] locality         If local or non local coordinates should be copied.
+ * \param[in] fillLocal        If the local filler particle coordinates should be zeroed.
+ * \param[in] nbat             Data in NBNXM format, used to zero coordinates of filler particles.
+ * \param[in] gpu_nbv          The NBNXM GPU data structure.
+ * \param[in] coordinatesHost  Coordinates to be copied (in plain rvec format).
+ */
+void nbnxn_atomdata_copy_x_to_gpu(const Nbnxm::GridSet     &gridSet,
+                                  Nbnxm::AtomLocality       locality,
+                                  bool                      fillLocal,
+                                  nbnxn_atomdata_t         *nbat,
+                                  gmx_nbnxn_gpu_t          *gpu_nbv,
+                                  const rvec               *coordinatesHost);
+
+/*!\brief Getter for the GPU coordinates buffer
+ *
+ * \param[in] gpu_nbv  The NBNXM GPU data structure.
+ */
+DeviceBuffer<float> nbnxn_atomdata_get_x_gpu(gmx_nbnxn_gpu_t *gpu_nbv);
+
+/*! \brief Transform coordinates to xbat layout on GPU
+ *
+ * Creates a GPU copy of the coordinates buffer using short-range ordering.
+ * As input, uses coordinates in plain rvec format in GPU memory.
+ *
+ * \param[in]     gridSet            The grids data.
+ * \param[in]     locality           If the transformation should be applied to local or non local coordinates.
+ * \param[in]     fillLocal          Tells if the local filler particle coordinates should be zeroed.
+ * \param[in,out] gpu_nbv            The NBNXM GPU data structure.
+ * \param[in]     coordinatesDevice  Coordinates to be copied (in plain rvec format).
+ */
+void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet     &gridSet,
+                                    Nbnxm::AtomLocality       locality,
+                                    bool                      fillLocal,
+                                    gmx_nbnxn_gpu_t          *gpu_nbv,
+                                    DeviceBuffer<float>       coordinatesDevice);
 
 //! Add the computed forces to \p f, an internal reduction might be performed as well
 template <bool  useGpu>
index 0b30c010b0042a9dacf7194e24f158492bdcdd89..63051b7c4314f77632157b6c011d0549d9cd03ae 100644 (file)
@@ -744,20 +744,18 @@ void cuda_set_cacheconfig()
     }
 }
 
-/* X buffer operations on GPU: performs conversion from rvec to nb format. */
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid               &grid,
-                           bool                             setFillerCoords,
-                           gmx_nbnxn_gpu_t                 *nb,
-                           void                            *xPmeDevicePtr,
-                           const Nbnxm::AtomLocality        locality,
-                           const rvec                      *x,
-                           int                              gridId,
-                           int                              numColumnsMax)
+/* X buffer operations on GPU: copies coordinates to the GPU in rvec format. */
+void nbnxn_gpu_copy_x_to_gpu(const Nbnxm::Grid               &grid,
+                             bool                             setFillerCoords,
+                             gmx_nbnxn_gpu_t                 *nb,
+                             const Nbnxm::AtomLocality        locality,
+                             const rvec                      *coordinatesHost,
+                             int                              gridId,
+                             int                              numColumnsMax)
 {
     GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
-    GMX_ASSERT(x,  "Need a valid x pointer");
+    GMX_ASSERT(coordinatesHost,  "Need a valid host pointer");
 
-    cu_atomdata_t             *adat    = nb->atdat;
     bool                       bDoTime = nb->bDoTime;
 
     const int                  numColumns                = grid.numColumns();
@@ -780,38 +778,51 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid               &grid,
         return;
     }
 
-    const rvec *d_x;
+    if (bDoTime)
+    {
+        nb->timers->xf[locality].nb_h2d.openTimingRegion(stream);
+    }
 
-    // copy of coordinates will be required if null pointer has been
-    // passed to function
-    // TODO improve this mechanism
-    bool        copyCoord = (xPmeDevicePtr == nullptr);
+    rvec       *devicePtrDest = reinterpret_cast<rvec *> (nb->xrvec[copyAtomStart]);
+    const rvec *devicePtrSrc  = reinterpret_cast<const rvec *> (coordinatesHost[copyAtomStart]);
+    copyToDeviceBuffer(&devicePtrDest, devicePtrSrc, 0, nCopyAtoms,
+                       stream, GpuApiCallBehavior::Async, nullptr);
 
-    // copy X-coordinate data to device
-    if (copyCoord)
+    if (bDoTime)
     {
-        if (bDoTime)
-        {
-            nb->timers->xf[locality].nb_h2d.openTimingRegion(stream);
-        }
+        nb->timers->xf[locality].nb_h2d.closeTimingRegion(stream);
+    }
+}
 
-        rvec       *devicePtrDest = reinterpret_cast<rvec *> (nb->xrvec[copyAtomStart]);
-        const rvec *devicePtrSrc  = reinterpret_cast<const rvec *> (x[copyAtomStart]);
-        copyToDeviceBuffer(&devicePtrDest, devicePtrSrc, 0, nCopyAtoms,
-                           stream, GpuApiCallBehavior::Async, nullptr);
+DeviceBuffer<float> nbnxn_gpu_get_x_gpu(gmx_nbnxn_gpu_t *nb)
+{
+    return reinterpret_cast< DeviceBuffer<float> >(nb->xrvec);
+}
 
-        if (bDoTime)
-        {
-            nb->timers->xf[locality].nb_h2d.closeTimingRegion(stream);
-        }
+/* X buffer operations on GPU: performs conversion from rvec to nb format. */
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid               &grid,
+                           bool                             setFillerCoords,
+                           gmx_nbnxn_gpu_t                 *nb,
+                           DeviceBuffer<float>              coordinatesDevice,
+                           const Nbnxm::AtomLocality        locality,
+                           int                              gridId,
+                           int                              numColumnsMax)
+{
+    GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
 
-        d_x = nb->xrvec;
-    }
-    else //coordinates have already been copied by PME stream
-    {
-        d_x = (rvec*) xPmeDevicePtr;
-    }
-    GMX_ASSERT(d_x,  "Need a valid d_x pointer");
+    cu_atomdata_t             *adat    = nb->atdat;
+
+    const int                  numColumns                = grid.numColumns();
+    const int                  cellOffset                = grid.cellOffset();
+    const int                  numAtomsPerCell           = grid.numAtomsPerCell();
+    Nbnxm::InteractionLocality interactionLoc            = gpuAtomToInteractionLocality(locality);
+    int                        nCopyAtoms                = grid.srcAtomEnd() - grid.srcAtomBegin();
+    int                        copyAtomStart             = grid.srcAtomBegin();
+
+    cudaStream_t               stream  = nb->stream[interactionLoc];
+
+    // TODO: This will only work with CUDA
+    GMX_ASSERT(coordinatesDevice,  "Need a valid device pointer");
 
     /* launch kernel on GPU */
 
@@ -835,7 +846,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid               &grid,
                                                                &numColumns,
                                                                &xqPtr,
                                                                &setFillerCoords,
-                                                               &d_x,
+                                                               &coordinatesDevice,
                                                                &d_atomIndices,
                                                                &d_cxy_na,
                                                                &d_cxy_ind,
index 3e4990a6ace074fbe71ea388137c1afa684cedb0..794b184f36747dd496a155626868c76eff2f6e74 100644 (file)
@@ -132,22 +132,52 @@ void nonbonded_verlet_t::setAtomProperties(const t_mdatoms          &mdatoms,
     nbnxn_atomdata_set(nbat.get(), pairSearch_->gridSet(), &mdatoms, atomInfo.data());
 }
 
-void nonbonded_verlet_t::setCoordinates(const Nbnxm::AtomLocality       locality,
-                                        const bool                      fillLocal,
-                                        gmx::ArrayRef<const gmx::RVec>  x,
-                                        BufferOpsUseGpu                 useGpu,
-                                        void                           *xPmeDevicePtr)
+void nonbonded_verlet_t::convertCoordinates(const Nbnxm::AtomLocality       locality,
+                                            const bool                      fillLocal,
+                                            gmx::ArrayRef<const gmx::RVec>  coordinates)
 {
     wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
     wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
 
-    auto fnPtr = (useGpu == BufferOpsUseGpu::True) ?
-        nbnxn_atomdata_copy_x_to_nbat_x<true> :
-        nbnxn_atomdata_copy_x_to_nbat_x<false>;
+    nbnxn_atomdata_copy_x_to_nbat_x(pairSearch_->gridSet(), locality, fillLocal,
+                                    as_rvec_array(coordinates.data()),
+                                    nbat.get());
 
-    fnPtr(pairSearch_->gridSet(), locality, fillLocal,
-          as_rvec_array(x.data()),
-          nbat.get(), gpu_nbv, xPmeDevicePtr);
+    wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
+    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+}
+
+
+void nonbonded_verlet_t::copyCoordinatesToGpu(const Nbnxm::AtomLocality       locality,
+                                              const bool                      fillLocal,
+                                              gmx::ArrayRef<const gmx::RVec>  coordinatesHost)
+{
+    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
+
+    nbnxn_atomdata_copy_x_to_gpu(pairSearch_->gridSet(), locality, fillLocal,
+                                 nbat.get(), gpu_nbv,
+                                 as_rvec_array(coordinatesHost.data()));
+
+    wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
+    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
+}
+
+DeviceBuffer<float> nonbonded_verlet_t::getDeviceCoordinates()
+{
+    return nbnxn_atomdata_get_x_gpu(gpu_nbv);
+}
+
+void nonbonded_verlet_t::convertCoordinatesGpu(const Nbnxm::AtomLocality       locality,
+                                               const bool                      fillLocal,
+                                               DeviceBuffer<float>             coordinatesDevice)
+{
+    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
+
+    nbnxn_atomdata_x_to_nbat_x_gpu(pairSearch_->gridSet(), locality, fillLocal,
+                                   gpu_nbv,
+                                   coordinatesDevice);
 
     wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
     wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
index 1a2ce8cc0c5b2dde9f5bc7a1161cbeae60e2de6a..50ead8c6551f405489acffe0deb617f9028d1a70 100644 (file)
 
 #include <memory>
 
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/enumerationhelpers.h"
@@ -251,12 +252,54 @@ struct nonbonded_verlet_t
         void setAtomProperties(const t_mdatoms          &mdatoms,
                                gmx::ArrayRef<const int>  atomInfo);
 
-        //! Updates the coordinates in Nbnxm for the given locality
-        void setCoordinates(Nbnxm::AtomLocality             locality,
-                            bool                            fillLocal,
-                            gmx::ArrayRef<const gmx::RVec>  x,
-                            BufferOpsUseGpu                 useGpu,
-                            void                           *xPmeDevicePtr);
+        /*!\brief Convert the coordinates to NBNXM format for the given locality.
+         *
+         * The API function for the transformation of the coordinates from one layout to another.
+         *
+         * \param[in] locality     Whether coordinates for local or non-local atoms should be transformed.
+         * \param[in] fillLocal    If the coordinates for filler particles should be zeroed.
+         * \param[in] coordinates  Coordinates in plain rvec format to be transformed.
+         */
+        void convertCoordinates(Nbnxm::AtomLocality             locality,
+                                bool                            fillLocal,
+                                gmx::ArrayRef<const gmx::RVec>  coordinates);
+
+        /*!\brief Copy coordinates to the GPU memory.
+         *
+         * This function uses the internal NBNXM GPU pointer to copy coordinates in the plain rvec format
+         * into the GPU memory.
+         *
+         * \todo This function will be removed in future patches as the management of the device buffers
+         *       is moved to a separate object.
+         *
+         * \param[in] locality         Whether coordinates for local or non-local atoms should be transformed.
+         * \param[in] fillLocal        If the coordinates for filler particles should be zeroed.
+         * \param[in] coordinatesHost  Coordinates in plain rvec format to be transformed.
+         */
+        void copyCoordinatesToGpu(Nbnxm::AtomLocality             locality,
+                                  bool                            fillLocal,
+                                  gmx::ArrayRef<const gmx::RVec>  coordinatesHost);
+
+        /*!\brief Getter for the GPU coordinates buffer.
+         *
+         * \todo This function will be removed in future patches as the management of the device buffers
+         *       is moved to a separate object.
+         *
+         * \returns The coordinates buffer in plain rvec format.
+         */
+        DeviceBuffer<float> getDeviceCoordinates();
+
+        /*!\brief Convert the coordinates to NBNXM format on the GPU for the given locality
+         *
+         * The API function for the transformation of the coordinates from one layout to another in the GPU memory.
+         *
+         * \param[in] locality           Whether coordinates for local or non-local atoms should be transformed.
+         * \param[in] fillLocal          If the coordinates for filler particles should be zeroed.
+         * \param[in] coordinatesDevice  GPU coordinates buffer in plain rvec format to be transformed.
+         */
+        void convertCoordinatesGpu(Nbnxm::AtomLocality             locality,
+                                   bool                            fillLocal,
+                                   DeviceBuffer<float>             coordinatesDevice);
 
         //! Init for GPU version of setup coordinates in Nbnxm
         void atomdata_init_copy_x_to_nbat_x_gpu();
index 08f0aa9f96a36d16f4e80091e95ee86d79d29008..4dfcfa15d476f9737c912824d4047d42dfea7652 100644 (file)
@@ -228,17 +228,57 @@ CUDA_FUNC_QUALIFIER
 void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused &gridSet,
                                 gmx_nbnxn_gpu_t    gmx_unused *gpu_nbv) CUDA_FUNC_TERM;
 
+/*! \brief Copy coordinates from host to device memory.
+ *
+ * \todo This will be removed as the management of the buffers is taken out of the NBNXM module.
+ *
+ * \param[in]     grid             Grid to be copied.
+ * \param[in]     setFillerCoords  If the filler coordinates are used.
+ * \param[in,out] gpu_nbv          The nonbonded data GPU structure.
+ * \param[in]     locality         Copy coordinates for local or non-local atoms.
+ * \param[in]     coordinatesHost  Host-side coordinates in plain rvec format.
+ * \param[in]     gridId           Index of the grid being copied.
+ * \param[in]     numColumnsMax    Maximum number of columns in the grid.
+ */
+CUDA_FUNC_QUALIFIER
+void nbnxn_gpu_copy_x_to_gpu(const Nbnxm::Grid   gmx_unused &grid,
+                             bool                gmx_unused  setFillerCoords,
+                             gmx_nbnxn_gpu_t     gmx_unused *gpu_nbv,
+                             Nbnxm::AtomLocality gmx_unused  locality,
+                             const rvec          gmx_unused *coordinatesHost,
+                             int                 gmx_unused  gridId,
+                             int                 gmx_unused  numColumnsMax) CUDA_FUNC_TERM;
+
+/*! \brief Getter for the device coordinates buffer.
+ *
+ * \todo This will be removed as the management of the buffers is taken out of the NBNXM module.
+ *
+ * \param[in]  gpu_nbv  The nonbonded data GPU structure.
+ *
+ * \returns Device coordinates buffer in plain rvec format.
+ */
+CUDA_FUNC_QUALIFIER
+DeviceBuffer<float> nbnxn_gpu_get_x_gpu(gmx_nbnxn_gpu_t gmx_unused *gpu_nbv) CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<float> {});
+
+
 /*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
+ *
+ * \param[in]     grid               Grid to be converted.
+ * \param[in]     setFillerCoords    If the filler coordinates are used.
+ * \param[in,out] gpu_nbv            The nonbonded data GPU structure.
+ * \param[in]     coordinatesDevice  Device-side coordinates in plain rvec format.
+ * \param[in]     locality           Copy coordinates for local or non-local atoms.
+ * \param[in]     gridId             Index of the grid being converted.
+ * \param[in]     numColumnsMax      Maximum number of columns in the grid.
  */
 CUDA_FUNC_QUALIFIER
-void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid  gmx_unused &grid,
-                           bool               gmx_unused  setFillerCoords,
-                           gmx_nbnxn_gpu_t    gmx_unused *gpu_nbv,
-                           void               gmx_unused *xPmeDevicePtr,
-                           Nbnxm::AtomLocality gmx_unused locality,
-                           const rvec         gmx_unused *x,
-                           int                gmx_unused  gridId,
-                           int                gmx_unused  numColumnsMax) CUDA_FUNC_TERM;
+void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid   gmx_unused &grid,
+                           bool                gmx_unused  setFillerCoords,
+                           gmx_nbnxn_gpu_t     gmx_unused *gpu_nbv,
+                           DeviceBuffer<float> gmx_unused  coordinatesDevice,
+                           Nbnxm::AtomLocality gmx_unused  locality,
+                           int                 gmx_unused  gridId,
+                           int                 gmx_unused  numColumnsMax) CUDA_FUNC_TERM;
 
 /*! \brief Sync the nonlocal stream with dependent tasks in the local queue.
  * \param[in] nb                   The nonbonded data GPU structure