Merge branch release-2018
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_ocl / nbnxn_ocl_data_mgmt.cpp
index de4cb58c01715007c8bec8152bbc683d8b1e37fd..f1bddab95d9816e4ca9fdf772707bd22eb29e3a4 100644 (file)
@@ -93,110 +93,6 @@ bool useLjCombRule(int vdwType)
             vdwType == evdwOclCUTCOMBLB);
 }
 
-/*! \brief Free device buffers
- *
- * If the pointers to the size variables are NULL no resetting happens.
- */
-static void ocl_free_buffered(cl_mem d_ptr, int *n, int *nalloc)
-{
-    cl_int gmx_unused cl_error;
-
-    if (d_ptr)
-    {
-        cl_error = clReleaseMemObject(d_ptr);
-        assert(cl_error == CL_SUCCESS);
-        // TODO: handle errors
-    }
-
-    if (n)
-    {
-        *n = -1;
-    }
-
-    if (nalloc)
-    {
-        *nalloc = -1;
-    }
-}
-
-/*! \brief Reallocation device buffers
- *
- *  Reallocation of the memory pointed by d_ptr and copying of the data from
- *  the location pointed by h_src host-side pointer is done. Allocation is
- *  buffered and therefore freeing is only needed if the previously allocated
- *  space is not enough.
- *  The H2D copy is launched in command queue s and can be done synchronously or
- *  asynchronously (the default is the latter).
- *  If copy_event is not NULL, on return it will contain an event object
- *  identifying the H2D copy. The event can further be used to queue a wait
- *  for this operation or to query profiling information.
- *  OpenCL equivalent of cu_realloc_buffered.
- */
-static void ocl_realloc_buffered(cl_mem *d_dest, void *h_src,
-                                 size_t type_size,
-                                 int *curr_size, int *curr_alloc_size,
-                                 int req_size,
-                                 cl_context context,
-                                 cl_command_queue s,
-                                 bool bAsync = true,
-                                 cl_event *copy_event = NULL)
-{
-    if (d_dest == NULL || req_size < 0)
-    {
-        return;
-    }
-
-    /* reallocate only if the data does not fit = allocation size is smaller
-       than the current requested size */
-    if (req_size > *curr_alloc_size)
-    {
-        cl_int gmx_unused cl_error;
-
-        /* only free if the array has already been initialized */
-        if (*curr_alloc_size >= 0)
-        {
-            ocl_free_buffered(*d_dest, curr_size, curr_alloc_size);
-        }
-
-        *curr_alloc_size = over_alloc_large(req_size);
-
-        *d_dest = clCreateBuffer(context, CL_MEM_READ_WRITE, *curr_alloc_size * type_size, NULL, &cl_error);
-        assert(cl_error == CL_SUCCESS);
-        // TODO: handle errors, check clCreateBuffer flags
-    }
-
-    /* size could have changed without actual reallocation */
-    *curr_size = req_size;
-
-    /* upload to device */
-    if (h_src)
-    {
-        if (bAsync)
-        {
-            ocl_copy_H2D_async(*d_dest, h_src, 0, *curr_size * type_size, s, copy_event);
-        }
-        else
-        {
-            ocl_copy_H2D_sync(*d_dest, h_src,  0, *curr_size * type_size, s);
-        }
-    }
-}
-
-/*! \brief Releases the input OpenCL buffer */
-static void free_ocl_buffer(cl_mem *buffer)
-{
-    cl_int gmx_unused cl_error;
-
-    assert(NULL != buffer);
-
-    if (*buffer)
-    {
-        cl_error = clReleaseMemObject(*buffer);
-        assert(CL_SUCCESS == cl_error);
-        *buffer = NULL;
-    }
-}
-
 /*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale
  * and the table GPU array.
  *
@@ -211,9 +107,9 @@ static void init_ewald_coulomb_force_table(const interaction_const_t       *ic,
 
     cl_int       cl_error;
 
-    if (nbp->coulomb_tab_climg2d != NULL)
+    if (nbp->coulomb_tab_climg2d != nullptr)
     {
-        free_ocl_buffer(&(nbp->coulomb_tab_climg2d));
+        freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
     }
 
     /* Switched from using textures to using buffers */
@@ -248,33 +144,33 @@ static void init_atomdata_first(cl_atomdata_t *ad, int ntypes, gmx_device_runtim
 
     /* An element of the shift_vec device buffer has the same size as one element
        of the host side shift_vec buffer. */
-    ad->shift_vec_elem_size = sizeof(*(((nbnxn_atomdata_t*)0)->shift_vec));
+    ad->shift_vec_elem_size = sizeof(*nbnxn_atomdata_t::shift_vec);
 
     // TODO: handle errors, check clCreateBuffer flags
-    ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, NULL, &cl_error);
+    ad->shift_vec = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->shift_vec_elem_size, nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     ad->bShiftVecUploaded = false;
 
     /* An element of the fshift device buffer has the same size as one element
        of the host side fshift buffer. */
-    ad->fshift_elem_size = sizeof(*(((cl_nb_staging_t*)0)->fshift));
+    ad->fshift_elem_size = sizeof(*cl_nb_staging_t::fshift);
 
-    ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, NULL, &cl_error);
+    ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, SHIFTS * ad->fshift_elem_size, nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error);
+    ad->e_lj = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), NULL, &cl_error);
+    ad->e_el = clCreateBuffer(runData->context, CL_MEM_READ_WRITE, sizeof(float), nullptr, &cl_error);
     assert(cl_error == CL_SUCCESS);
     // TODO: handle errors, check clCreateBuffer flags
 
-    /* initialize to NULL pointers to data that is not allocated here and will
+    /* initialize to nullptr pointers to data that is not allocated here and will
        need reallocation in nbnxn_gpu_init_atomdata */
-    ad->xq = NULL;
-    ad->f  = NULL;
+    ad->xq = nullptr;
+    ad->f  = nullptr;
 
     /* size -1 indicates that the respective array hasn't been initialized yet */
     ad->natoms = -1;
@@ -420,7 +316,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
         }
     }
     /* generate table for PME */
-    nbp->coulomb_tab_climg2d = NULL;
+    nbp->coulomb_tab_climg2d = nullptr;
     if (nbp->eeltype == eelOclEWALD_TAB || nbp->eeltype == eelOclEWALD_TAB_TWIN)
     {
         init_ewald_coulomb_force_table(ic, nbp, runData);
@@ -428,7 +324,7 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
     else
     // TODO: improvement needed.
     // The image2d is created here even if eeltype is not eelCuEWALD_TAB or eelCuEWALD_TAB_TWIN because the OpenCL kernels
-    // don't accept NULL values for image2D parameters.
+    // don't accept nullptr values for image2D parameters.
     {
         /* Switched from using textures to using buffers */
         // TODO: decide which alternative is most efficient - textures or buffers.
@@ -439,10 +335,10 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
            array_format.image_channel_order     = CL_R;
 
            nbp->coulomb_tab_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE,
-            &array_format, 1, 1, 0, NULL, &cl_error);
+            &array_format, 1, 1, 0, nullptr, &cl_error);
          */
 
-        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error);
+        nbp->coulomb_tab_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error);
         // TODO: handle errors
     }
 
@@ -482,12 +378,12 @@ static void init_nbparam(cl_nbparam_t                    *nbp,
         {
             // TODO: improvement needed.
             // The image2d is created here even if vdwtype is not evdwPME because the OpenCL kernels
-            // don't accept NULL values for image2D parameters.
+            // don't accept nullptr values for image2D parameters.
             /* Switched from using textures to using buffers */
             // TODO: decide which alternative is most efficient - textures or buffers.
             /* nbp->nbfp_comb_climg2d = clCreateImage2D(runData->context, CL_MEM_READ_WRITE,
-                &array_format, 1, 1, 0, NULL, &cl_error);*/
-            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), NULL, &cl_error);
+                &array_format, 1, 1, 0, nullptr, &cl_error);*/
+            nbp->nbfp_comb_climg2d = clCreateBuffer(runData->context, CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error);
 
 
             assert(cl_error == CL_SUCCESS);
@@ -519,12 +415,12 @@ void nbnxn_gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
  */
 static void init_plist(cl_plist_t *pl)
 {
-    /* initialize to NULL pointers to data that is not allocated here and will
+    /* initialize to nullptr pointers to data that is not allocated here and will
        need reallocation in nbnxn_gpu_init_pairlist */
-    pl->sci     = NULL;
-    pl->cj4     = NULL;
-    pl->imask   = NULL;
-    pl->excl    = NULL;
+    pl->sci     = nullptr;
+    pl->cj4     = nullptr;
+    pl->imask   = nullptr;
+    pl->excl    = nullptr;
 
     /* size -1 indicates that the respective array hasn't been initialized yet */
     pl->na_c           = -1;
@@ -597,8 +493,8 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
     cl_context                context;
     cl_int                    cl_error;
 
-    assert(runtimeData != NULL);
-    assert(devInfo != NULL);
+    assert(runtimeData != nullptr);
+    assert(devInfo != nullptr);
 
     platform_id      = devInfo->ocl_gpu_id.ocl_platform_id;
     device_id        = devInfo->ocl_gpu_id.ocl_device_id;
@@ -607,7 +503,7 @@ nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
     context_properties[1] = (cl_context_properties) platform_id;
     context_properties[2] = 0; /* Terminates the list of properties */
 
-    context = clCreateContext(context_properties, 1, &device_id, NULL, NULL, &cl_error);
+    context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error);
     if (CL_SUCCESS != cl_error)
     {
         gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
@@ -669,7 +565,7 @@ nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb)
     cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts);
     GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
 
-    cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr);
     GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
 }
 
@@ -730,7 +626,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
 
     assert(ic);
 
-    if (p_nb == NULL)
+    if (p_nb == nullptr)
     {
         return;
     }
@@ -753,18 +649,15 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
     nb->dev_info = deviceInfo;
     snew(nb->dev_rundata, 1);
 
-    /* init to NULL the debug buffer */
-    nb->debug_buffer = NULL;
-
     /* init nbst */
-    ocl_pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
-    ocl_pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
-    ocl_pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
+    pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
+    pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
+    pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
 
     init_plist(nb->plist[eintLocal]);
 
     /* OpenCL timing disabled if GMX_DISABLE_GPU_TIMING is defined. */
-    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == NULL);
+    nb->bDoTime = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
 
     /* Create queues only after bDoTime has been initialized */
     if (nb->bDoTime)
@@ -816,8 +709,8 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
      * TODO: decide about NVIDIA
      */
     nb->bPrefetchLjParam =
-        (getenv("GMX_OCL_DISABLE_I_PREFETCH") == NULL) &&
-        ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != NULL));
+        (getenv("GMX_OCL_DISABLE_I_PREFETCH") == nullptr) &&
+        ((nb->dev_info->vendor_e == OCL_VENDOR_AMD) || (getenv("GMX_OCL_ENABLE_I_PREFETCH") != nullptr));
 
     /* NOTE: in CUDA we pick L1 cache configuration for the nbnxn kernels here,
      * but sadly this is not supported in OpenCL (yet?). Consider adding it if
@@ -846,7 +739,6 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
         return;
     }
 
-    cl_int               cl_error;
     cl_atomdata_t *      adat     = nb->atdat;
     cl_command_queue     ls       = nb->stream[eintLocal];
     cl_float             value    = 0.0f;
@@ -865,13 +757,14 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
     global_work_size[0] = ((natoms_flat + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];
 
 
+    cl_int gmx_used_in_debug cl_error;
     arg_no    = 0;
     cl_error  = clSetKernelArg(memset_f, arg_no++, sizeof(cl_mem), &(adat->f));
     cl_error |= clSetKernelArg(memset_f, arg_no++, sizeof(cl_float), &value);
     cl_error |= clSetKernelArg(memset_f, arg_no++, sizeof(cl_uint), &natoms_flat);
     assert(cl_error == CL_SUCCESS);
 
-    cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+    cl_error = clEnqueueNDRangeKernel(ls, memset_f, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr);
     assert(cl_error == CL_SUCCESS);
 }
 
@@ -927,30 +820,29 @@ void nbnxn_gpu_init_pairlist(gmx_nbnxn_ocl_t        *nb,
         nb->timers->didPairlistH2D[iloc] = true;
     }
 
-    ocl_realloc_buffered(&d_plist->sci, h_plist->sci, sizeof(nbnxn_sci_t),
-                         &d_plist->nsci, &d_plist->sci_nalloc,
-                         h_plist->nsci,
-                         nb->dev_rundata->context,
-                         stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
-
-    ocl_realloc_buffered(&d_plist->cj4, h_plist->cj4, sizeof(nbnxn_cj4_t),
-                         &d_plist->ncj4, &d_plist->cj4_nalloc,
-                         h_plist->ncj4,
-                         nb->dev_rundata->context,
-                         stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
-
-    /* this call only allocates space on the device (no data is transferred) - no timing as well! */
-    ocl_realloc_buffered(&d_plist->imask, NULL, sizeof(unsigned int),
-                         &d_plist->nimask, &d_plist->imask_nalloc,
-                         h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
-                         nb->dev_rundata->context,
-                         stream, true);
-
-    ocl_realloc_buffered(&d_plist->excl, h_plist->excl, sizeof(nbnxn_excl_t),
-                         &d_plist->nexcl, &d_plist->excl_nalloc,
-                         h_plist->nexcl,
-                         nb->dev_rundata->context,
-                         stream, true, bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+    // TODO most of this function is same in CUDA and OpenCL, move into the header
+    Context context = nb->dev_rundata->context;
+
+    reallocateDeviceBuffer(&d_plist->sci, h_plist->nsci,
+                           &d_plist->nsci, &d_plist->sci_nalloc, context);
+    copyToDeviceBuffer(&d_plist->sci, h_plist->sci, 0, h_plist->nsci,
+                       stream, GpuApiCallBehavior::Async,
+                       bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+    reallocateDeviceBuffer(&d_plist->cj4, h_plist->ncj4,
+                           &d_plist->ncj4, &d_plist->cj4_nalloc, context);
+    copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4, 0, h_plist->ncj4,
+                       stream, GpuApiCallBehavior::Async,
+                       bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
+
+    reallocateDeviceBuffer(&d_plist->imask, h_plist->ncj4*c_nbnxnGpuClusterpairSplit,
+                           &d_plist->nimask, &d_plist->imask_nalloc, context);
+
+    reallocateDeviceBuffer(&d_plist->excl, h_plist->nexcl,
+                           &d_plist->nexcl, &d_plist->excl_nalloc, context);
+    copyToDeviceBuffer(&d_plist->excl, h_plist->excl, 0, h_plist->nexcl,
+                       stream, GpuApiCallBehavior::Async,
+                       bDoTime ? nb->timers->pl_h2d[iloc].fetchNextEvent() : nullptr);
 
     if (bDoTime)
     {
@@ -972,7 +864,7 @@ void nbnxn_gpu_upload_shiftvec(gmx_nbnxn_ocl_t        *nb,
     if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
     {
         ocl_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 0,
-                           SHIFTS * adat->shift_vec_elem_size, ls, NULL);
+                           SHIFTS * adat->shift_vec_elem_size, ls, nullptr);
         adat->bShiftVecUploaded = true;
     }
 }
@@ -1007,34 +899,34 @@ void nbnxn_gpu_init_atomdata(gmx_nbnxn_ocl_t               *nb,
         /* free up first if the arrays have already been initialized */
         if (d_atdat->nalloc != -1)
         {
-            ocl_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
-            ocl_free_buffered(d_atdat->xq, NULL, NULL);
-            ocl_free_buffered(d_atdat->lj_comb, NULL, NULL);
-            ocl_free_buffered(d_atdat->atom_types, NULL, NULL);
+            freeDeviceBuffer(&d_atdat->f);
+            freeDeviceBuffer(&d_atdat->xq);
+            freeDeviceBuffer(&d_atdat->lj_comb);
+            freeDeviceBuffer(&d_atdat->atom_types);
         }
 
         d_atdat->f_elem_size = sizeof(rvec);
 
         // TODO: handle errors, check clCreateBuffer flags
-        d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, NULL, &cl_error);
+        d_atdat->f = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * d_atdat->f_elem_size, nullptr, &cl_error);
         assert(CL_SUCCESS == cl_error);
 
         // TODO: change the flag to read-only
-        d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), NULL, &cl_error);
+        d_atdat->xq = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float4), nullptr, &cl_error);
         assert(CL_SUCCESS == cl_error);
         // TODO: handle errors, check clCreateBuffer flags
 
         if (useLjCombRule(nb->nbparam->vdwtype))
         {
             // TODO: change the flag to read-only
-            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), NULL, &cl_error);
+            d_atdat->lj_comb = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(cl_float2), nullptr, &cl_error);
             assert(CL_SUCCESS == cl_error);
             // TODO: handle errors, check clCreateBuffer flags
         }
         else
         {
             // TODO: change the flag to read-only
-            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), NULL, &cl_error);
+            d_atdat->atom_types = clCreateBuffer(nb->dev_rundata->context, CL_MEM_READ_WRITE, nalloc * sizeof(int), nullptr, &cl_error);
             assert(CL_SUCCESS == cl_error);
             // TODO: handle errors, check clCreateBuffer flags
         }
@@ -1079,14 +971,14 @@ static void free_kernel(cl_kernel *kernel_ptr)
 {
     cl_int gmx_unused cl_error;
 
-    assert(NULL != kernel_ptr);
+    assert(nullptr != kernel_ptr);
 
     if (*kernel_ptr)
     {
         cl_error = clReleaseKernel(*kernel_ptr);
         assert(cl_error == CL_SUCCESS);
 
-        *kernel_ptr = NULL;
+        *kernel_ptr = nullptr;
     }
 }
 
@@ -1110,7 +1002,7 @@ static void free_kernels(cl_kernel *kernels, int count)
  */
 static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
 {
-    if (runData == NULL)
+    if (runData == nullptr)
     {
         return;
     }
@@ -1120,14 +1012,14 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
     if (runData->context)
     {
         cl_error         = clReleaseContext(runData->context);
-        runData->context = NULL;
+        runData->context = nullptr;
         assert(CL_SUCCESS == cl_error);
     }
 
     if (runData->program)
     {
         cl_error         = clReleaseProgram(runData->program);
-        runData->program = NULL;
+        runData->program = nullptr;
         assert(CL_SUCCESS == cl_error);
     }
 
@@ -1136,7 +1028,7 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t *runData)
 //! This function is documented in the header file
 void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
 {
-    if (nb == NULL)
+    if (nb == nullptr)
     {
         return;
     }
@@ -1160,68 +1052,67 @@ void nbnxn_gpu_free(gmx_nbnxn_ocl_t *nb)
     free_kernel(&(nb->kernel_zero_e_fshift));
 
     /* Free atdat */
-    free_ocl_buffer(&(nb->atdat->xq));
-    free_ocl_buffer(&(nb->atdat->f));
-    free_ocl_buffer(&(nb->atdat->e_lj));
-    free_ocl_buffer(&(nb->atdat->e_el));
-    free_ocl_buffer(&(nb->atdat->fshift));
-    free_ocl_buffer(&(nb->atdat->lj_comb));
-    free_ocl_buffer(&(nb->atdat->atom_types));
-    free_ocl_buffer(&(nb->atdat->shift_vec));
+    freeDeviceBuffer(&(nb->atdat->xq));
+    freeDeviceBuffer(&(nb->atdat->f));
+    freeDeviceBuffer(&(nb->atdat->e_lj));
+    freeDeviceBuffer(&(nb->atdat->e_el));
+    freeDeviceBuffer(&(nb->atdat->fshift));
+    freeDeviceBuffer(&(nb->atdat->lj_comb));
+    freeDeviceBuffer(&(nb->atdat->atom_types));
+    freeDeviceBuffer(&(nb->atdat->shift_vec));
     sfree(nb->atdat);
 
     /* Free nbparam */
-    free_ocl_buffer(&(nb->nbparam->nbfp_climg2d));
-    free_ocl_buffer(&(nb->nbparam->nbfp_comb_climg2d));
-    free_ocl_buffer(&(nb->nbparam->coulomb_tab_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
+    freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
     sfree(nb->nbparam);
 
     /* Free plist */
-    free_ocl_buffer(&(nb->plist[eintLocal]->sci));
-    free_ocl_buffer(&(nb->plist[eintLocal]->cj4));
-    free_ocl_buffer(&(nb->plist[eintLocal]->imask));
-    free_ocl_buffer(&(nb->plist[eintLocal]->excl));
-    sfree(nb->plist[eintLocal]);
+    auto *plist = nb->plist[eintLocal];
+    freeDeviceBuffer(&plist->sci);
+    freeDeviceBuffer(&plist->cj4);
+    freeDeviceBuffer(&plist->imask);
+    freeDeviceBuffer(&plist->excl);
+    sfree(plist);
     if (nb->bUseTwoStreams)
     {
-        free_ocl_buffer(&(nb->plist[eintNonlocal]->sci));
-        free_ocl_buffer(&(nb->plist[eintNonlocal]->cj4));
-        free_ocl_buffer(&(nb->plist[eintNonlocal]->imask));
-        free_ocl_buffer(&(nb->plist[eintNonlocal]->excl));
-        sfree(nb->plist[eintNonlocal]);
+        auto *plist_nl = nb->plist[eintNonlocal];
+        freeDeviceBuffer(&plist_nl->sci);
+        freeDeviceBuffer(&plist_nl->cj4);
+        freeDeviceBuffer(&plist_nl->imask);
+        freeDeviceBuffer(&plist_nl->excl);
+        sfree(plist_nl);
     }
 
     /* Free nbst */
-    ocl_pfree(nb->nbst.e_lj);
-    nb->nbst.e_lj = NULL;
-
-    ocl_pfree(nb->nbst.e_el);
-    nb->nbst.e_el = NULL;
+    pfree(nb->nbst.e_lj);
+    nb->nbst.e_lj = nullptr;
 
-    ocl_pfree(nb->nbst.fshift);
-    nb->nbst.fshift = NULL;
+    pfree(nb->nbst.e_el);
+    nb->nbst.e_el = nullptr;
 
-    /* Free debug buffer */
-    free_ocl_buffer(&nb->debug_buffer);
+    pfree(nb->nbst.fshift);
+    nb->nbst.fshift = nullptr;
 
     /* Free command queues */
     clReleaseCommandQueue(nb->stream[eintLocal]);
-    nb->stream[eintLocal] = NULL;
+    nb->stream[eintLocal] = nullptr;
     if (nb->bUseTwoStreams)
     {
         clReleaseCommandQueue(nb->stream[eintNonlocal]);
-        nb->stream[eintNonlocal] = NULL;
+        nb->stream[eintNonlocal] = nullptr;
     }
     /* Free other events */
     if (nb->nonlocal_done)
     {
         clReleaseEvent(nb->nonlocal_done);
-        nb->nonlocal_done = NULL;
+        nb->nonlocal_done = nullptr;
     }
     if (nb->misc_ops_and_local_H2D_done)
     {
         clReleaseEvent(nb->misc_ops_and_local_H2D_done);
-        nb->misc_ops_and_local_H2D_done = NULL;
+        nb->misc_ops_and_local_H2D_done = nullptr;
     }
 
     free_gpu_device_runtime_data(nb->dev_rundata);
@@ -1256,7 +1147,7 @@ void nbnxn_gpu_reset_timings(nonbonded_verlet_t* nbv)
 //! This function is documented in the header file
 int nbnxn_gpu_min_ci_balanced(gmx_nbnxn_ocl_t *nb)
 {
-    return nb != NULL ?
+    return nb != nullptr ?
            gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
 }