void pme_gpu_free_bspline_values(const PmeGpu *pmeGpu)
{
pfree(pmeGpu->staging.h_splineModuli);
- cu_free_buffered(pmeGpu->kernelParams->grid.d_splineModuli, &pmeGpu->archSpecific->splineValuesSize,
- &pmeGpu->archSpecific->splineValuesSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_splineModuli);
}
void pme_gpu_realloc_forces(PmeGpu *pmeGpu)
void pme_gpu_free_forces(const PmeGpu *pmeGpu)
{
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_forces, &pmeGpu->archSpecific->forcesSize, &pmeGpu->archSpecific->forcesSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_forces);
}
void pme_gpu_copy_input_forces(PmeGpu *pmeGpu)
void pme_gpu_free_coordinates(const PmeGpu *pmeGpu)
{
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_coordinates, &pmeGpu->archSpecific->coordinatesSize, &pmeGpu->archSpecific->coordinatesSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coordinates);
}
void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu *pmeGpu, const float *h_coefficients)
void pme_gpu_free_coefficients(const PmeGpu *pmeGpu)
{
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_coefficients, &pmeGpu->archSpecific->coefficientsSize, &pmeGpu->archSpecific->coefficientsSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coefficients);
}
void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu)
void pme_gpu_free_spline_data(const PmeGpu *pmeGpu)
{
/* Two arrays of the same size */
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_theta);
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_dtheta, &pmeGpu->archSpecific->splineDataSize, &pmeGpu->archSpecific->splineDataSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_theta);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_dtheta);
pfree(pmeGpu->staging.h_theta);
pfree(pmeGpu->staging.h_dtheta);
}
void pme_gpu_free_grid_indices(const PmeGpu *pmeGpu)
{
- cu_free_buffered(pmeGpu->kernelParams->atoms.d_gridlineIndices, &pmeGpu->archSpecific->gridlineIndicesSize, &pmeGpu->archSpecific->gridlineIndicesSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_gridlineIndices);
pfree(pmeGpu->staging.h_gridlineIndices);
}
{
if (pmeGpu->archSpecific->performOutOfPlaceFFT)
{
- cu_free_buffered(pmeGpu->kernelParams->grid.d_fourierGrid);
+ freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_fourierGrid);
}
- cu_free_buffered(pmeGpu->kernelParams->grid.d_realGrid,
- &pmeGpu->archSpecific->realGridSize, &pmeGpu->archSpecific->realGridSizeAlloc);
+ freeDeviceBuffer(&pmeGpu->kernelParams->grid.d_realGrid);
}
void pme_gpu_clear_grids(const PmeGpu *pmeGpu)
/* GPU arrays element counts (not the arrays sizes in bytes!).
* They might be larger than the actual meaningful data sizes.
* These are paired: the actual element count + the maximum element count that can fit in the current allocated memory.
- * These integer pairs are mostly meaningful for the cu_realloc/free_buffered calls.
- * As such, if cu_realloc/free_buffered is refactored, they can be freely changed, too.
+ * These integer pairs are mostly meaningful for the cu_realloc_buffered calls.
+ * As such, if cu_realloc_buffered is refactored, they can be freely changed, too.
* The only exceptions are realGridSize and complexGridSize which are also used for grid clearing/copying.
* TODO: these should live in a clean buffered container type, and be refactored in the NB/cudautils as well.
*/
/**** Operation on buffered arrays (arrays with "over-allocation" in gmx wording) *****/
-/*!
- * If the pointers to the size variables are NULL no resetting happens.
- */
-void cu_free_buffered(void *d_ptr, int *n, int *nalloc)
-{
- cudaError_t stat;
-
- if (d_ptr)
- {
- stat = cudaFree(d_ptr);
- CU_RET_ERR(stat, "cudaFree failed");
- }
-
- if (n)
- {
- *n = -1;
- }
-
- if (nalloc)
- {
- *nalloc = -1;
- }
-}
-
/*!
* Reallocation of the memory pointed by d_ptr and copying of the data from
* the location pointed by h_src host-side pointer is done. Allocation is
/* only free if the array has already been initialized */
if (*curr_alloc_size >= 0)
{
- cu_free_buffered(*d_dest, curr_size, curr_alloc_size);
+ freeDeviceBuffer(d_dest);
}
*curr_alloc_size = over_alloc_large(req_size);
stat = cudaMalloc(d_dest, *curr_alloc_size * type_size);
- CU_RET_ERR(stat, "cudaMalloc failed in cu_free_buffered");
+ CU_RET_ERR(stat, "cudaMalloc failed in cu_realloc_buffered");
}
/* size could have changed without actual reallocation */
/*! Launches asynchronous host to device memory copy in stream s. */
int cu_copy_H2D_async(void * /*d_dest*/, void * /*h_src*/, size_t /*bytes*/, cudaStream_t /*s = 0*/);
-/*! Frees device memory and resets the size and allocation size to -1. */
-void cu_free_buffered(void *d_ptr, int *n = NULL, int *nalloc = NULL);
-
/*! Reallocates the device memory and copies data from the host. */
void cu_realloc_buffered(void **d_dest, void *h_src,
size_t type_size,
}
/*! \brief Free a device-side buffer.
- * TODO: fully replace cu_free_buffered with this.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
*
* \param[in] buffer Pointer to the buffer to free.
*/
}
/*! \brief Free a device-side buffer.
- * TODO: fully replace free_ocl_buffer and ocl_free_buffered with this.
+ * This does not reset separately stored size/capacity integers,
+ * as this is planned to be a destructor of DeviceBuffer as a proper class,
+ * and no calls on \p buffer should be made afterwards.
*
* \param[in] buffer Pointer to the buffer to free.
*/
/* free up first if the arrays have already been initialized */
if (d_atdat->nalloc != -1)
{
- cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
- cu_free_buffered(d_atdat->xq);
- cu_free_buffered(d_atdat->atom_types);
- cu_free_buffered(d_atdat->lj_comb);
+ freeDeviceBuffer(&d_atdat->f);
+ freeDeviceBuffer(&d_atdat->xq);
+ freeDeviceBuffer(&d_atdat->atom_types);
+ freeDeviceBuffer(&d_atdat->lj_comb);
}
stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f));
stat = cudaFree(atdat->e_el);
CU_RET_ERR(stat, "cudaFree failed on atdat->e_el");
- cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc);
- cu_free_buffered(atdat->xq);
- cu_free_buffered(atdat->atom_types, &atdat->ntypes);
- cu_free_buffered(atdat->lj_comb);
+ freeDeviceBuffer(&atdat->f);
+ freeDeviceBuffer(&atdat->xq);
+ freeDeviceBuffer(&atdat->atom_types);
+ freeDeviceBuffer(&atdat->lj_comb);
/* Free plist */
auto *plist = nb->plist[eintLocal];
vdwType == evdwOclCUTCOMBLB);
}
-/*! \brief Free device buffers
- *
- * If the pointers to the size variables are NULL no resetting happens.
- */
-static void ocl_free_buffered(cl_mem d_ptr, int *n, int *nalloc)
-{
- cl_int gmx_unused cl_error;
-
- if (d_ptr)
- {
- cl_error = clReleaseMemObject(d_ptr);
- assert(cl_error == CL_SUCCESS);
- // TODO: handle errors
- }
-
- if (n)
- {
- *n = -1;
- }
-
- if (nalloc)
- {
- *nalloc = -1;
- }
-}
-
/*! \brief Reallocation device buffers
*
* Reallocation of the memory pointed by d_ptr and copying of the data from
/* only free if the array has already been initialized */
if (*curr_alloc_size >= 0)
{
- ocl_free_buffered(*d_dest, curr_size, curr_alloc_size);
+ freeDeviceBuffer(d_dest);
}
*curr_alloc_size = over_alloc_large(req_size);
}
}
-/*! \brief Releases the input OpenCL buffer */
-static void free_ocl_buffer(cl_mem *buffer)
-{
- cl_int gmx_unused cl_error;
-
- assert(NULL != buffer);
-
- if (*buffer)
- {
- cl_error = clReleaseMemObject(*buffer);
- assert(CL_SUCCESS == cl_error);
- *buffer = NULL;
- }
-}
-
/*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale
* and the table GPU array.
*
if (nbp->coulomb_tab_climg2d != NULL)
{
- free_ocl_buffer(&(nbp->coulomb_tab_climg2d));
+ freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
}
/* Switched from using textures to using buffers */
/* free up first if the arrays have already been initialized */
if (d_atdat->nalloc != -1)
{
- ocl_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
- ocl_free_buffered(d_atdat->xq, NULL, NULL);
- ocl_free_buffered(d_atdat->lj_comb, NULL, NULL);
- ocl_free_buffered(d_atdat->atom_types, NULL, NULL);
+ freeDeviceBuffer(&d_atdat->f);
+ freeDeviceBuffer(&d_atdat->xq);
+ freeDeviceBuffer(&d_atdat->lj_comb);
+ freeDeviceBuffer(&d_atdat->atom_types);
}
d_atdat->f_elem_size = sizeof(rvec);
free_kernel(&(nb->kernel_zero_e_fshift));
/* Free atdat */
- free_ocl_buffer(&(nb->atdat->xq));
- free_ocl_buffer(&(nb->atdat->f));
- free_ocl_buffer(&(nb->atdat->e_lj));
- free_ocl_buffer(&(nb->atdat->e_el));
- free_ocl_buffer(&(nb->atdat->fshift));
- free_ocl_buffer(&(nb->atdat->lj_comb));
- free_ocl_buffer(&(nb->atdat->atom_types));
- free_ocl_buffer(&(nb->atdat->shift_vec));
+ freeDeviceBuffer(&(nb->atdat->xq));
+ freeDeviceBuffer(&(nb->atdat->f));
+ freeDeviceBuffer(&(nb->atdat->e_lj));
+ freeDeviceBuffer(&(nb->atdat->e_el));
+ freeDeviceBuffer(&(nb->atdat->fshift));
+ freeDeviceBuffer(&(nb->atdat->lj_comb));
+ freeDeviceBuffer(&(nb->atdat->atom_types));
+ freeDeviceBuffer(&(nb->atdat->shift_vec));
sfree(nb->atdat);
/* Free nbparam */
- free_ocl_buffer(&(nb->nbparam->nbfp_climg2d));
- free_ocl_buffer(&(nb->nbparam->nbfp_comb_climg2d));
- free_ocl_buffer(&(nb->nbparam->coulomb_tab_climg2d));
+ freeDeviceBuffer(&(nb->nbparam->nbfp_climg2d));
+ freeDeviceBuffer(&(nb->nbparam->nbfp_comb_climg2d));
+ freeDeviceBuffer(&(nb->nbparam->coulomb_tab_climg2d));
sfree(nb->nbparam);
/* Free plist */
nb->nbst.fshift = NULL;
/* Free debug buffer */
- free_ocl_buffer(&nb->debug_buffer);
+ freeDeviceBuffer(&nb->debug_buffer);
/* Free command queues */
clReleaseCommandQueue(nb->stream[eintLocal]);