Replace *_free_buffered calls by freeDeviceBuffer

author Aleksei Iupinov <a.yupinov@gmail.com>

Tue, 13 Feb 2018 16:49:40 +0000 (17:49 +0100)

committer Szilárd Páll <pall.szilard@gmail.com>

Sun, 18 Feb 2018 23:02:23 +0000 (00:02 +0100)
author Aleksei Iupinov <a.yupinov@gmail.com>
Tue, 13 Feb 2018 16:49:40 +0000 (17:49 +0100)
committer Szilárd Páll <pall.szilard@gmail.com>
Sun, 18 Feb 2018 23:02:23 +0000 (00:02 +0100)
diff --git a/src/gromacs/ewald/pme.cu b/src/gromacs/ewald/pme.cu

index bbc2bbaacc2e76b9abf2b4556377fa81bdfed4c7..1abc07aa0ecc25d38869b31cd59741d0be296e42 100644 (file)
--- a/src/gromacs/ewald/pme.cu
+++ b/src/gromacs/ewald/pme.cu
@@ -133,8 +133,7 @@ void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu)
  void pme_gpu_free_bspline_values(const PmeGpu *pmeGpu)
  {
      pfree(pmeGpu->staging.h_splineModuli);
-    cu_free_buffered(pmeGpu->kernelParams->grid.d_splineModuli, &pmeGpu->archSpecific->splineValuesSize,
-                     &pmeGpu->archSpecific->splineValuesSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_splineModuli);
  }
  
  void pme_gpu_realloc_forces(PmeGpu *pmeGpu)
@@ -149,7 +148,7 @@ void pme_gpu_realloc_forces(PmeGpu *pmeGpu)
  
  void pme_gpu_free_forces(const PmeGpu *pmeGpu)
  {
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_forces, &pmeGpu->archSpecific->forcesSize, &pmeGpu->archSpecific->forcesSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_forces);
  }
  
  void pme_gpu_copy_input_forces(PmeGpu *pmeGpu)
@@ -198,7 +197,7 @@ void pme_gpu_copy_input_coordinates(const PmeGpu *pmeGpu, const rvec *h_coordina
  
  void pme_gpu_free_coordinates(const PmeGpu *pmeGpu)
  {
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_coordinates, &pmeGpu->archSpecific->coordinatesSize, &pmeGpu->archSpecific->coordinatesSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coordinates);
  }
  
  void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu *pmeGpu, const float *h_coefficients)
@@ -225,7 +224,7 @@ void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu *pmeGpu, const flo
  
  void pme_gpu_free_coefficients(const PmeGpu *pmeGpu)
  {
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_coefficients, &pmeGpu->archSpecific->coefficientsSize, &pmeGpu->archSpecific->coefficientsSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coefficients);
  }
  
  void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
@@ -256,8 +255,8 @@ void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
  void pme_gpu_free_spline_data(const PmeGpu *pmeGpu)
  {
      /* Two arrays of the same size */
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_theta);
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_dtheta, &pmeGpu->archSpecific->splineDataSize, &pmeGpu->archSpecific->splineDataSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_theta);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_dtheta);
      pfree(pmeGpu->staging.h_theta);
      pfree(pmeGpu->staging.h_dtheta);
  }
@@ -274,7 +273,7 @@ void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu)
  
  void pme_gpu_free_grid_indices(const PmeGpu *pmeGpu)
  {
-    cu_free_buffered(pmeGpu->kernelParams->atoms.d_gridlineIndices, &pmeGpu->archSpecific->gridlineIndicesSize, &pmeGpu->archSpecific->gridlineIndicesSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_gridlineIndices);
      pfree(pmeGpu->staging.h_gridlineIndices);
  }
  
@@ -315,10 +314,9 @@ void pme_gpu_free_grids(const PmeGpu *pmeGpu)
  {
      if (pmeGpu->archSpecific->performOutOfPlaceFFT)
      {
-        cu_free_buffered(pmeGpu->kernelParams->grid.d_fourierGrid);
+        freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_fourierGrid);
      }
-    cu_free_buffered(pmeGpu->kernelParams->grid.d_realGrid,
-                     &pmeGpu->archSpecific->realGridSize, &pmeGpu->archSpecific->realGridSizeAlloc);
+    freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_realGrid);
  }
  
  void pme_gpu_clear_grids(const PmeGpu *pmeGpu)
diff --git a/src/gromacs/ewald/pme.cuh b/src/gromacs/ewald/pme.cuh

index e58c3de4957dd5416027746c46875ce31ac9facf..0f37af38d6c4678e0128f4d2a464a7f29b8ad2cf 100644 (file)
--- a/src/gromacs/ewald/pme.cuh
+++ b/src/gromacs/ewald/pme.cuh
@@ -186,8 +186,8 @@ struct PmeGpuCuda
      /* GPU arrays element counts (not the arrays sizes in bytes!).
       * They might be larger than the actual meaningful data sizes.
       * These are paired: the actual element count + the maximum element count that can fit in the current allocated memory.
-     * These integer pairs are mostly meaningful for the cu_realloc/free_buffered calls.
-     * As such, if cu_realloc/free_buffered is refactored, they can be freely changed, too.
+     * These integer pairs are mostly meaningful for the cu_realloc_buffered calls.
+     * As such, if cu_realloc_buffered is refactored, they can be freely changed, too.
       * The only exceptions are realGridSize and complexGridSize which are also used for grid clearing/copying.
       * TODO: these should live in a clean buffered container type, and be refactored in the NB/cudautils as well.
       */
diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu

index 53e204a2ab111cc8313d4572b5e79f9837e35d04..e466ef97290d6062a1ace68ac0e50abad77f0a22 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cu
+++ b/src/gromacs/gpu_utils/cudautils.cu
@@ -137,30 +137,6 @@ int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s
  
  /**** Operation on buffered arrays (arrays with "over-allocation" in gmx wording) *****/
  
-/*!
- * If the pointers to the size variables are NULL no resetting happens.
- */
-void cu_free_buffered(void *d_ptr, int *n, int *nalloc)
-{
-    cudaError_t stat;
-
-    if (d_ptr)
-    {
-        stat = cudaFree(d_ptr);
-        CU_RET_ERR(stat, "cudaFree failed");
-    }
-
-    if (n)
-    {
-        *n = -1;
-    }
-
-    if (nalloc)
-    {
-        *nalloc = -1;
-    }
-}
-
  /*!
   *  Reallocation of the memory pointed by d_ptr and copying of the data from
   *  the location pointed by h_src host-side pointer is done. Allocation is
@@ -190,13 +166,13 @@ void cu_realloc_buffered(void **d_dest, void *h_src,
          /* only free if the array has already been initialized */
          if (*curr_alloc_size >= 0)
          {
-            cu_free_buffered(*d_dest, curr_size, curr_alloc_size);
+            freeDeviceBuffer(d_dest);
          }
  
          *curr_alloc_size = over_alloc_large(req_size);
  
          stat = cudaMalloc(d_dest, *curr_alloc_size * type_size);
-        CU_RET_ERR(stat, "cudaMalloc failed in cu_free_buffered");
+        CU_RET_ERR(stat, "cudaMalloc failed in cu_realloc_buffered");
      }
  
      /* size could have changed without actual reallocation */
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh

index d74ecd4d8dea253fe7c4663428294a5890a396bc..a91df33e2d7bdf310cb0885a3c7acd38afafc50d 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -202,9 +202,6 @@ int cu_copy_H2D_sync(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/);
  /*! Launches asynchronous host to device memory copy in stream s. */
  int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
  
-/*! Frees device memory and resets the size and allocation size to -1. */
-void cu_free_buffered(void *d_ptr, int *n = NULL, int *nalloc = NULL);
-
  /*! Reallocates the device memory and copies data from the host. */
  void cu_realloc_buffered(void **d_dest, void *h_src,
                           size_t type_size,
@@ -298,7 +295,9 @@ static inline bool haveStreamTasksCompleted(cudaStream_t s)
  }
  
  /*! \brief Free a device-side buffer.
- * TODO: fully replace cu_free_buffered with this.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
   *
   * \param[in] buffer  Pointer to the buffer to free.
   */
diff --git a/src/gromacs/gpu_utils/oclutils.h b/src/gromacs/gpu_utils/oclutils.h

index e4bc91b320430a01089f9eb854320126d8a92ee4..093fe31e19f61fc708bf595278742209955c969b 100644 (file)
--- a/src/gromacs/gpu_utils/oclutils.h
+++ b/src/gromacs/gpu_utils/oclutils.h
@@ -178,7 +178,9 @@ static inline bool haveStreamTasksCompleted(cl_command_queue gmx_unused s)
  }
  
  /*! \brief Free a device-side buffer.
- * TODO: fully replace free_ocl_buffer and ocl_free_buffered with this.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
   *
   * \param[in] buffer  Pointer to the buffer to free.
   */
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index ea5375667d0c3783b65f48804d36937538572fcb..ea9b3b8fe4e616059cfe05c793f7c6a8167d9f84 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -668,10 +668,10 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_cuda_t              *nb,
          /* free up first if the arrays have already been initialized */
          if (d_atdat->nalloc != -1)
          {
-            cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
-            cu_free_buffered(d_atdat->xq);
-            cu_free_buffered(d_atdat->atom_types);
-            cu_free_buffered(d_atdat->lj_comb);
+            freeDeviceBuffer(&d_atdat->f);
+            freeDeviceBuffer(&d_atdat->xq);
+            freeDeviceBuffer(&d_atdat->atom_types);
+            freeDeviceBuffer(&d_atdat->lj_comb);
          }
  
          stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f));
@@ -784,10 +784,10 @@ void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb)
      stat = cudaFree(atdat->e_el);
      CU_RET_ERR(stat, "cudaFree failed on atdat->e_el");
  
-    cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc);
-    cu_free_buffered(atdat->xq);
-    cu_free_buffered(atdat->atom_types, &atdat->ntypes);
-    cu_free_buffered(atdat->lj_comb);
+    freeDeviceBuffer(&atdat->f);
+    freeDeviceBuffer(&atdat->xq);
+    freeDeviceBuffer(&atdat->atom_types);
+    freeDeviceBuffer(&atdat->lj_comb);
  
      /* Free plist */
      auto *plist = nb->plist[eintLocal];
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index 189c06944f064df9659a349f20d12e2088594198..2489efa85b9260aef45989e25b95f65da58c8759 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -94,32 +94,6 @@ bool useLjCombRule(int vdwType)
              vdwType == evdwOclCUTCOMBLB);
  }
  
-/*! \brief Free device buffers
- *
- * If the pointers to the size variables are NULL no resetting happens.
- */
-static void ocl_free_buffered(cl_mem d_ptr, int *n, int *nalloc)
-{
-    cl_int gmx_unused cl_error;
-
-    if (d_ptr)
-    {
-        cl_error = clReleaseMemObject(d_ptr);
-        assert(cl_error == CL_SUCCESS);
-        // TODO: handle errors
-    }
-
-    if (n)
-    {
-        *n = -1;
-    }
-
-    if (nalloc)
-    {
-        *nalloc = -1;
-    }
-}
-
  /*! \brief Reallocation device buffers
   *
   *  Reallocation of the memory pointed by d_ptr and copying of the data from
@@ -156,7 +130,7 @@ static void ocl_realloc_buffered(cl_mem *d_dest, void *h_src,
          /* only free if the array has already been initialized */
          if (*curr_alloc_size >= 0)
          {
-            ocl_free_buffered(*d_dest, curr_size, curr_alloc_size);
+            freeDeviceBuffer(d_dest);
          }
  
          *curr_alloc_size = over_alloc_large(req_size);
@@ -183,21 +157,6 @@ static void ocl_realloc_buffered(cl_mem *d_dest, void *h_src,
      }
  }
  
-/*! \brief Releases the input OpenCL buffer */
-static void free_ocl_buffer(cl_mem *buffer)
-{
-    cl_int gmx_unused cl_error;
-
-    assert(NULL != buffer);
-
-    if (*buffer)
-    {
-        cl_error = clReleaseMemObject(*buffer);
-        assert(CL_SUCCESS == cl_error);
-        *buffer = NULL;
-    }
-}
-
  /*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale
   * and the table GPU array.
   *
@@ -214,7 +173,7 @@ static void init_ewald_coulomb_force_table(const interaction_const_t       *ic,
  
      if (nbp->coulomb_tab_climg2d != NULL)
      {
-        free_ocl_buffer(&(nbp->coulomb_tab_climg2d));
+        freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
      }
  
      /* Switched from using textures to using buffers */
@@ -1010,10 +969,10 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
          /* free up first if the arrays have already been initialized */
          if (d_atdat->nalloc != -1)
          {
-            ocl_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
-            ocl_free_buffered(d_atdat->xq, NULL, NULL);
-            ocl_free_buffered(d_atdat->lj_comb, NULL, NULL);
-            ocl_free_buffered(d_atdat->atom_types, NULL, NULL);
+            freeDeviceBuffer(&d_atdat->f);
+            freeDeviceBuffer(&d_atdat->xq);
+            freeDeviceBuffer(&d_atdat->lj_comb);
+            freeDeviceBuffer(&d_atdat->atom_types);
          }
  
          d_atdat->f_elem_size = sizeof(rvec);
@@ -1163,20 +1122,20 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
      free_kernel(&(nb->kernel_zero_e_fshift));
  
      /* Free atdat */
-    free_ocl_buffer(&(nb->atdat->xq));
-    free_ocl_buffer(&(nb->atdat->f));
-    free_ocl_buffer(&(nb->atdat->e_lj));
-    free_ocl_buffer(&(nb->atdat->e_el));
-    free_ocl_buffer(&(nb->atdat->fshift));
-    free_ocl_buffer(&(nb->atdat->lj_comb));
-    free_ocl_buffer(&(nb->atdat->atom_types));
-    free_ocl_buffer(&(nb->atdat->shift_vec));
+    freeDeviceBuffer(&(nb->atdat->xq));
+    freeDeviceBuffer(&(nb->atdat->f));
+    freeDeviceBuffer(&(nb->atdat->e_lj));
+    freeDeviceBuffer(&(nb->atdat->e_el));
+    freeDeviceBuffer(&(nb->atdat->fshift));
+    freeDeviceBuffer(&(nb->atdat->lj_comb));
+    freeDeviceBuffer(&(nb->atdat->atom_types));
+    freeDeviceBuffer(&(nb->atdat->shift_vec));
      sfree(nb->atdat);
  
      /* Free nbparam */
-    free_ocl_buffer(&(nb->nbparam->nbfp_climg2d));
-    free_ocl_buffer(&(nb->nbparam->nbfp_comb_climg2d));
-    free_ocl_buffer(&(nb->nbparam->coulomb_tab_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
      sfree(nb->nbparam);
  
      /* Free plist */
@@ -1207,7 +1166,7 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
      nb->nbst.fshift = NULL;
  
      /* Free debug buffer */
-    free_ocl_buffer(&nb->debug_buffer);
+    freeDeviceBuffer(&nb->debug_buffer);
  
      /* Free command queues */
      clReleaseCommandQueue(nb->stream[eintLocal]);
author	Aleksei Iupinov <a.yupinov@gmail.com>
	Tue, 13 Feb 2018 16:49:40 +0000 (17:49 +0100)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Sun, 18 Feb 2018 23:02:23 +0000 (00:02 +0100)
src/gromacs/ewald/pme.cu		patch \| blob \| history
src/gromacs/ewald/pme.cuh		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cu		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cuh		patch \| blob \| history
src/gromacs/gpu_utils/oclutils.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history