From 1ced5fb7a39a90ec1866f7ee9302438f1e2db8f3 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Thu, 16 Apr 2020 11:38:32 +0000 Subject: [PATCH] Unify CUDA and OpenCL lookup-table creation In CUDA code, textures are used for the lookup-tables, whereas in OpenCL they are created as a read-only buffers. This commit hides these differences behind a unified wrapper. Refs #3318 Refs #3311 Change-Id: I003e0c982c2452a2753e331b46fc59f0b7e1b711 --- src/gromacs/ewald/pme.cuh | 13 +-- src/gromacs/ewald/pme_gpu_internal.cpp | 29 ++---- src/gromacs/ewald/pme_gpu_types.h | 13 +-- src/gromacs/gpu_utils/cudautils.cu | 64 ------------- src/gromacs/gpu_utils/cudautils.cuh | 35 -------- src/gromacs/gpu_utils/devicebuffer.cuh | 89 ++++++++++++++++++- src/gromacs/gpu_utils/devicebuffer_ocl.h | 47 ++++++++++ .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 30 ++++--- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 89 +++---------------- 9 files changed, 183 insertions(+), 226 deletions(-) diff --git a/src/gromacs/ewald/pme.cuh b/src/gromacs/ewald/pme.cuh index f30a975a22..af0e258ae9 100644 --- a/src/gromacs/ewald/pme.cuh +++ b/src/gromacs/ewald/pme.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,17 +50,12 @@ #include "pme_gpu_types.h" /*! \brief \internal - * A single structure encompassing all the PME data used in CUDA kernels. - * This inherits from PmeGpuKernelParamsBase and adds a couple cudaTextureObject_t handles, - * which we would like to avoid in plain C++. + * An alias for PME parameters in CUDA. + * \todo Remove if we decide to unify CUDA and OpenCL */ struct PmeGpuCudaKernelParams : PmeGpuKernelParamsBase { - /* These are CUDA texture objects, related to the grid size. */ - /*! \brief CUDA texture object for accessing grid.d_fractShiftsTable */ - cudaTextureObject_t fractShiftsTableTexture; - /*! \brief CUDA texture object for accessing grid.d_gridlineIndicesTable */ - cudaTextureObject_t gridlineIndicesTableTexture; + // Place CUDA-specific stuff here }; #endif diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index 62f7cc5c7e..63b77aa86f 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -367,35 +367,22 @@ void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu* pmeGpu) const int newFractShiftsSize = cellCount * (nx + ny + nz); -#if GMX_GPU == GMX_GPU_CUDA - initParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable, kernelParamsPtr->fractShiftsTableTexture, - pmeGpu->common->fsh.data(), newFractShiftsSize); + initParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable, + &kernelParamsPtr->fractShiftsTableTexture, pmeGpu->common->fsh.data(), + newFractShiftsSize, pmeGpu->archSpecific->deviceContext_); - initParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable, - kernelParamsPtr->gridlineIndicesTableTexture, pmeGpu->common->nn.data(), - newFractShiftsSize); -#elif GMX_GPU == GMX_GPU_OPENCL - // No dedicated texture routines.... - allocateDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable, newFractShiftsSize, - pmeGpu->archSpecific->deviceContext_); - allocateDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable, newFractShiftsSize, - pmeGpu->archSpecific->deviceContext_); - copyToDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable, pmeGpu->common->fsh.data(), 0, - newFractShiftsSize, pmeGpu->archSpecific->pmeStream_, - GpuApiCallBehavior::Async, nullptr); - copyToDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable, pmeGpu->common->nn.data(), 0, - newFractShiftsSize, pmeGpu->archSpecific->pmeStream_, - GpuApiCallBehavior::Async, nullptr); -#endif + initParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable, + &kernelParamsPtr->gridlineIndicesTableTexture, pmeGpu->common->nn.data(), + newFractShiftsSize, pmeGpu->archSpecific->deviceContext_); } void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu) { auto* kernelParamsPtr = pmeGpu->kernelParams.get(); #if GMX_GPU == GMX_GPU_CUDA - destroyParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable, + destroyParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable, kernelParamsPtr->fractShiftsTableTexture); - destroyParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable, + destroyParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable, kernelParamsPtr->gridlineIndicesTableTexture); #elif GMX_GPU == GMX_GPU_OPENCL freeDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable); diff --git a/src/gromacs/ewald/pme_gpu_types.h b/src/gromacs/ewald/pme_gpu_types.h index 5ddd79fb5a..97f8416499 100644 --- a/src/gromacs/ewald/pme_gpu_types.h +++ b/src/gromacs/ewald/pme_gpu_types.h @@ -199,11 +199,9 @@ struct PmeGpuDynamicParams }; /*! \internal \brief - * A single structure encompassing almost all the PME data used in GPU kernels on device. - * This is inherited by the GPU framework-specific structure - * (PmeGpuCudaKernelParams in pme.cuh). - * This way, most code preparing the kernel parameters can be GPU-agnostic by casting - * the kernel parameter data pointer to PmeGpuKernelParamsBase. + * A single structure encompassing all the PME data used in GPU kernels on device. + * To extend the list with platform-specific parameters, this can be inherited by the + * GPU framework-specific structure. */ struct PmeGpuKernelParamsBase { @@ -218,6 +216,11 @@ struct PmeGpuKernelParamsBase * before launching spreading. */ struct PmeGpuDynamicParams current; + /* These texture objects are only used in CUDA and are related to the grid size. */ + /*! \brief Texture object for accessing grid.d_fractShiftsTable */ + HIDE_FROM_OPENCL_COMPILER(DeviceTexture) fractShiftsTableTexture; + /*! \brief Texture object for accessing grid.d_gridlineIndicesTable */ + HIDE_FROM_OPENCL_COMPILER(DeviceTexture) gridlineIndicesTableTexture; }; #endif diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu index 1442bf47c2..83ee02e3b5 100644 --- a/src/gromacs/gpu_utils/cudautils.cu +++ b/src/gromacs/gpu_utils/cudautils.cu @@ -130,67 +130,3 @@ int cu_copy_H2D_async(void* d_dest, const void* h_src, size_t bytes, cudaStream_ { return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s); } - -/*! \brief Set up texture object for an array of type T. - * - * Set up texture object for an array of type T and bind it to the device memory - * \p d_ptr points to. - * - * \tparam[in] T Raw data type - * \param[out] texObj texture object to initialize - * \param[in] d_ptr pointer to device global memory to bind \p texObj to - * \param[in] sizeInBytes size of memory area to bind \p texObj to - */ -template -static void setup1DTexture(cudaTextureObject_t& texObj, void* d_ptr, size_t sizeInBytes) -{ - assert(!c_disableCudaTextures); - - cudaError_t stat; - cudaResourceDesc rd; - cudaTextureDesc td; - - memset(&rd, 0, sizeof(rd)); - rd.resType = cudaResourceTypeLinear; - rd.res.linear.devPtr = d_ptr; - rd.res.linear.desc = cudaCreateChannelDesc(); - rd.res.linear.sizeInBytes = sizeInBytes; - - memset(&td, 0, sizeof(td)); - td.readMode = cudaReadModeElementType; - stat = cudaCreateTextureObject(&texObj, &rd, &td, nullptr); - CU_RET_ERR(stat, "cudaCreateTextureObject failed"); -} - -template -void initParamLookupTable(T*& d_ptr, cudaTextureObject_t& texObj, const T* h_ptr, int numElem) -{ - const size_t sizeInBytes = numElem * sizeof(*d_ptr); - cudaError_t stat = cudaMalloc((void**)&d_ptr, sizeInBytes); - CU_RET_ERR(stat, "cudaMalloc failed in initParamLookupTable"); - cu_copy_H2D_sync(d_ptr, (void*)h_ptr, sizeInBytes); - - if (!c_disableCudaTextures) - { - setup1DTexture(texObj, d_ptr, sizeInBytes); - } -} - -template -void destroyParamLookupTable(T* d_ptr, cudaTextureObject_t texObj) -{ - if (!c_disableCudaTextures && texObj) - { - CU_RET_ERR(cudaDestroyTextureObject(texObj), "cudaDestroyTextureObject on texObj failed"); - } - CU_RET_ERR(cudaFree(d_ptr), "cudaFree failed"); -} - -/*! \brief Add explicit instantiations of init/destroyParamLookupTable() here as needed. - * One should also verify that the result of cudaCreateChannelDesc() during texture setup - * looks reasonable, when instantiating the templates for new types - just in case. - */ -template void initParamLookupTable(float*&, cudaTextureObject_t&, const float*, int); -template void destroyParamLookupTable(float*, cudaTextureObject_t); -template void initParamLookupTable(int*&, cudaTextureObject_t&, const int*, int); -template void destroyParamLookupTable(int*, cudaTextureObject_t); diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh index ff07d174c2..02eec85bbd 100644 --- a/src/gromacs/gpu_utils/cudautils.cuh +++ b/src/gromacs/gpu_utils/cudautils.cuh @@ -170,41 +170,6 @@ int cu_copy_H2D_async(void* /*d_dest*/, const void* /*h_src*/, size_t /*bytes*/, // GPU table object. There is also almost self-contained fetchFromParamLookupTable() // in cuda_kernel_utils.cuh. They could all live in a separate class/struct file. -/*! \brief Initialize parameter lookup table. - * - * Initializes device memory, copies data from host and binds - * a texture to allocated device memory to be used for parameter lookup. - * - * \tparam[in] T Raw data type - * \param[out] d_ptr device pointer to the memory to be allocated - * \param[out] texObj texture object to be initialized - * \param[in] h_ptr pointer to the host memory to be uploaded to the device - * \param[in] numElem number of elements in the h_ptr - */ -template -void initParamLookupTable(T*& d_ptr, cudaTextureObject_t& texObj, const T* h_ptr, int numElem); - -// Add extern declarations so each translation unit understands that -// there will be a definition provided. -extern template void initParamLookupTable(int*&, cudaTextureObject_t&, const int*, int); -extern template void initParamLookupTable(float*&, cudaTextureObject_t&, const float*, int); - -/*! \brief Destroy parameter lookup table. - * - * Unbinds texture object, deallocates device memory. - * - * \tparam[in] T Raw data type - * \param[in] d_ptr Device pointer to the memory to be deallocated - * \param[in] texObj Texture object to be deinitialized - */ -template -void destroyParamLookupTable(T* d_ptr, cudaTextureObject_t texObj); - -// Add extern declarations so each translation unit understands that -// there will be a definition provided. -extern template void destroyParamLookupTable(int*, cudaTextureObject_t); -extern template void destroyParamLookupTable(float*, cudaTextureObject_t); - /*! \brief Add a triplets stored in a float3 to an rvec variable. * * \param[out] a Rvec to increment diff --git a/src/gromacs/gpu_utils/devicebuffer.cuh b/src/gromacs/gpu_utils/devicebuffer.cuh index d4bfe8c35b..c9bce141fe 100644 --- a/src/gromacs/gpu_utils/devicebuffer.cuh +++ b/src/gromacs/gpu_utils/devicebuffer.cuh @@ -45,11 +45,13 @@ * \inlibraryapi */ +#include "gromacs/gpu_utils/cuda_arch_utils.cuh" #include "gromacs/gpu_utils/device_context.h" #include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior #include "gromacs/gpu_utils/gputraits.cuh" #include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/stringutil.h" /*! \brief * Allocates a device-side buffer. @@ -139,7 +141,6 @@ void copyToDeviceBuffer(DeviceBuffer* buffer, } } - /*! \brief * Performs the device-to-host data copy, synchronous or asynchronously on request. * @@ -231,4 +232,90 @@ static bool checkDeviceBuffer(DeviceBuffer buffer, gmx_unused int requiredSiz return buffer != nullptr; } +//! Device texture wrapper. +using DeviceTexture = cudaTextureObject_t; + +/*! \brief Create a texture object for an array of type ValueType. + * + * Creates the device buffer, copies data and binds texture object for an array of type ValueType. + * + * \todo Test if using textures is still relevant on modern hardware. + * + * \tparam ValueType Raw data type. + * + * \param[out] deviceBuffer Device buffer to store data in. + * \param[out] deviceTexture Device texture object to initialize. + * \param[in] hostBuffer Host buffer to get date from + * \param[in] numValues Number of elements in the buffer. + * \param[in] deviceContext GPU device context. + */ +template +void initParamLookupTable(DeviceBuffer* deviceBuffer, + DeviceTexture* deviceTexture, + const ValueType* hostBuffer, + int numValues, + const DeviceContext& deviceContext) +{ + if (numValues == 0) + { + return; + } + GMX_ASSERT(hostBuffer, "Host buffer should be specified."); + + allocateDeviceBuffer(deviceBuffer, numValues, deviceContext); + + const size_t sizeInBytes = numValues * sizeof(ValueType); + + cudaError_t stat = + cudaMemcpy(*((ValueType**)deviceBuffer), hostBuffer, sizeInBytes, cudaMemcpyHostToDevice); + + GMX_RELEASE_ASSERT( + stat == cudaSuccess, + gmx::formatString("Synchronous H2D copy failed (CUDA error: %s).", cudaGetErrorName(stat)) + .c_str()); + + if (!c_disableCudaTextures) + { + cudaResourceDesc rd; + cudaTextureDesc td; + + memset(&rd, 0, sizeof(rd)); + rd.resType = cudaResourceTypeLinear; + rd.res.linear.devPtr = *deviceBuffer; + rd.res.linear.desc = cudaCreateChannelDesc(); + rd.res.linear.sizeInBytes = sizeInBytes; + + memset(&td, 0, sizeof(td)); + td.readMode = cudaReadModeElementType; + stat = cudaCreateTextureObject(deviceTexture, &rd, &td, nullptr); + GMX_RELEASE_ASSERT(stat == cudaSuccess, + gmx::formatString("cudaCreateTextureObject failed (CUDA error: %s).", + cudaGetErrorName(stat)) + .c_str()); + } +} + +/*! \brief Unbind the texture and release the CUDA texture object. + * + * \tparam ValueType Raw data type + * + * \param[in,out] deviceBuffer Device buffer to store data in. + * \param[in,out] deviceTexture Device texture object to unbind. + */ +template +void destroyParamLookupTable(DeviceBuffer* deviceBuffer, DeviceTexture& deviceTexture) +{ + if (!c_disableCudaTextures && deviceTexture && deviceBuffer) + { + cudaError_t stat = cudaDestroyTextureObject(deviceTexture); + GMX_RELEASE_ASSERT( + stat == cudaSuccess, + gmx::formatString( + "cudaDestroyTextureObject on texture object failed (CUDA error: %s).", + cudaGetErrorName(stat)) + .c_str()); + } + freeDeviceBuffer(deviceBuffer); +} + #endif diff --git a/src/gromacs/gpu_utils/devicebuffer_ocl.h b/src/gromacs/gpu_utils/devicebuffer_ocl.h index a8aa77b046..f1eac5a9d1 100644 --- a/src/gromacs/gpu_utils/devicebuffer_ocl.h +++ b/src/gromacs/gpu_utils/devicebuffer_ocl.h @@ -270,6 +270,53 @@ static bool checkDeviceBuffer(DeviceBuffer buffer, int requiredSize) return retval == CL_SUCCESS && static_cast(size) >= requiredSize; } +//! Device texture wrapper. +using DeviceTexture = void*; + +/*! \brief Create a texture object for an array of type ValueType. + * + * Creates the device buffer and copies read-only data for an array of type ValueType. + * + * \todo Decide if using image2d is most efficient. + * + * \tparam ValueType Raw data type. + * + * \param[out] deviceBuffer Device buffer to store data in. + * \param[in] hostBuffer Host buffer to get date from. + * \param[in] numValues Number of elements in the buffer. + * \param[in] deviceContext GPU device context. + */ +template +void initParamLookupTable(DeviceBuffer* deviceBuffer, + DeviceTexture* /* deviceTexture */, + const ValueType* hostBuffer, + int numValues, + const DeviceContext& deviceContext) +{ + GMX_ASSERT(hostBuffer, "Host buffer pointer can not be null"); + const size_t bytes = numValues * sizeof(ValueType); + cl_int clError; + *deviceBuffer = clCreateBuffer(deviceContext.context(), + CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, + bytes, const_cast(hostBuffer), &clError); + + GMX_RELEASE_ASSERT(clError == CL_SUCCESS, + gmx::formatString("Constant memory allocation failed (OpenCL error %d: %s)", + clError, ocl_get_error_string(clError).c_str()) + .c_str()); +} + +/*! \brief Release the OpenCL device buffer. + * + * \tparam ValueType Raw data type. + * + * \param[in,out] deviceBuffer Device buffer to store data in. + */ +template +void destroyParamLookupTable(DeviceBuffer* deviceBuffer, DeviceTexture& /* deviceTexture*/) +{ + freeDeviceBuffer(deviceBuffer); +} #if defined(__clang__) # pragma clang diagnostic pop #endif diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index 6579d41004..d41ee7d2ac 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -111,7 +111,9 @@ static inline bool useLjCombRule(const cu_nbparam_t* nbparam) and the table GPU array. If called with an already allocated table, it just re-uploads the table. */ -static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, cu_nbparam_t* nbp) +static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, + cu_nbparam_t* nbp, + const DeviceContext& deviceContext) { if (nbp->coulomb_tab != nullptr) { @@ -119,8 +121,8 @@ static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, } nbp->coulomb_tab_scale = tables.scale; - initParamLookupTable(nbp->coulomb_tab, nbp->coulomb_tab_texobj, tables.tableF.data(), - tables.tableF.size()); + initParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(), + tables.tableF.size(), deviceContext); } @@ -233,7 +235,8 @@ static void set_cutoff_parameters(cu_nbparam_t* nbp, const interaction_const_t* static void init_nbparam(cu_nbparam_t* nbp, const interaction_const_t* ic, const PairlistParams& listParams, - const nbnxn_atomdata_t::Params& nbatParams) + const nbnxn_atomdata_t::Params& nbatParams, + const DeviceContext& deviceContext) { int ntypes; @@ -319,19 +322,21 @@ static void init_nbparam(cu_nbparam_t* nbp, if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN) { GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp); + init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); } /* set up LJ parameter lookup table */ if (!useLjCombRule(nbp)) { - initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj, nbatParams.nbfp.data(), 2 * ntypes * ntypes); + initParamLookupTable(&nbp->nbfp, &nbp->nbfp_texobj, nbatParams.nbfp.data(), + 2 * ntypes * ntypes, deviceContext); } /* set up LJ-PME parameter lookup table */ if (ic->vdwtype == evdwPME) { - initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj, nbatParams.nbfp_comb.data(), 2 * ntypes); + initParamLookupTable(&nbp->nbfp_comb, &nbp->nbfp_comb_texobj, nbatParams.nbfp_comb.data(), + 2 * ntypes, deviceContext); } } @@ -343,6 +348,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti { return; } + NbnxmGpu* nb = nbv->gpu_nbv; cu_nbparam_t* nbp = nbv->gpu_nbv->nbparam; set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); @@ -350,7 +356,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti nbp->eeltype = pick_ewald_kernel_type(*ic); GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); - init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp); + init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); } /*! Initializes the pair list data structure. */ @@ -407,7 +413,7 @@ static void cuda_init_const(NbnxmGpu* nb, const nbnxn_atomdata_t::Params& nbatParams) { init_atomdata_first(nb->atdat, nbatParams.numTypes); - init_nbparam(nb->nbparam, ic, listParams, nbatParams); + init_nbparam(nb->nbparam, ic, listParams, nbatParams, *nb->deviceContext_); /* clear energy and shift force outputs */ nbnxn_cuda_clear_e_fshift(nb); @@ -697,7 +703,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam) { if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN) { - destroyParamLookupTable(nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); + destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); } } @@ -726,12 +732,12 @@ void gpu_free(NbnxmGpu* nb) if (!useLjCombRule(nb->nbparam)) { - destroyParamLookupTable(nbparam->nbfp, nbparam->nbfp_texobj); + destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj); } if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB) { - destroyParamLookupTable(nbparam->nbfp_comb, nbparam->nbfp_comb_texobj); + destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj); } stat = cudaFree(atdat->shift_vec); diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index bc913e0e24..4f1e9fc65c 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -118,35 +118,16 @@ static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, cl_nbparam_t* nbp, const DeviceContext& deviceContext) { - cl_mem coul_tab; - - cl_int cl_error; - if (nbp->coulomb_tab_climg2d != nullptr) { freeDeviceBuffer(&(nbp->coulomb_tab_climg2d)); } - /* Switched from using textures to using buffers */ - // TODO: decide which alternative is most efficient - textures or buffers. - /* - cl_image_format array_format; - - array_format.image_channel_data_type = CL_FLOAT; - array_format.image_channel_order = CL_R; - - coul_tab = clCreateImage2D(deviceContext.context(), CL_MEM_READ_WRITE | - CL_MEM_COPY_HOST_PTR, &array_format, tabsize, 1, 0, ftmp, &cl_error); - */ + DeviceBuffer coulomb_tab; - coul_tab = clCreateBuffer(deviceContext.context(), - CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - tables.tableF.size() * sizeof(cl_float), - const_cast(tables.tableF.data()), &cl_error); - GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, - ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str()); + initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext); - nbp->coulomb_tab_climg2d = coul_tab; + nbp->coulomb_tab_climg2d = coulomb_tab; nbp->coulomb_tab_scale = tables.scale; } @@ -320,22 +301,7 @@ static void init_nbparam(cl_nbparam_t* nbp, init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); } else - // TODO: improvement needed. - // The image2d is created here even if eeltype is not eelCuEWALD_TAB or eelCuEWALD_TAB_TWIN - // because the OpenCL kernels don't accept nullptr values for image2D parameters. { - /* Switched from using textures to using buffers */ - // TODO: decide which alternative is most efficient - textures or buffers. - /* - cl_image_format array_format; - - array_format.image_channel_data_type = CL_FLOAT; - array_format.image_channel_order = CL_R; - - nbp->coulomb_tab_climg2d = clCreateImage2D(deviceContext.context(), - CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error); - */ - nbp->coulomb_tab_climg2d = clCreateBuffer(deviceContext.context(), CL_MEM_READ_ONLY, sizeof(cl_float), nullptr, &cl_error); GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, @@ -346,51 +312,16 @@ static void init_nbparam(cl_nbparam_t* nbp, const int nnbfp_comb = 2 * nbatParams.numTypes; { - /* Switched from using textures to using buffers */ - // TODO: decide which alternative is most efficient - textures or buffers. - /* - cl_image_format array_format; - - array_format.image_channel_data_type = CL_FLOAT; - array_format.image_channel_order = CL_R; - - nbp->nbfp_climg2d = clCreateImage2D(deviceContext.context(), CL_MEM_READ_ONLY | - CL_MEM_COPY_HOST_PTR, &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error); - */ - - nbp->nbfp_climg2d = clCreateBuffer( - deviceContext.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - nnbfp * sizeof(cl_float), const_cast(nbatParams.nbfp.data()), &cl_error); - GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, - ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str()); + /* set up LJ parameter lookup table */ + DeviceBuffer nbfp; + initParamLookupTable(&nbfp, nullptr, nbatParams.nbfp.data(), nnbfp, deviceContext); + nbp->nbfp_climg2d = nbfp; if (ic->vdwtype == evdwPME) { - /* Switched from using textures to using buffers */ - // TODO: decide which alternative is most efficient - textures or buffers. - /* nbp->nbfp_comb_climg2d = clCreateImage2D(deviceContext.context(), CL_MEM_READ_WRITE | - CL_MEM_COPY_HOST_PTR, &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/ - nbp->nbfp_comb_climg2d = - clCreateBuffer(deviceContext.context(), - CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - nnbfp_comb * sizeof(cl_float), - const_cast(nbatParams.nbfp_comb.data()), &cl_error); - GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, - ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str()); - } - else - { - // TODO: improvement needed. - // The image2d is created here even if vdwtype is not evdwPME because the OpenCL kernels - // don't accept nullptr values for image2D parameters. - /* Switched from using textures to using buffers */ - // TODO: decide which alternative is most efficient - textures or buffers. - /* nbp->nbfp_comb_climg2d = clCreateImage2D(deviceContext.context(), - CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);*/ - nbp->nbfp_comb_climg2d = clCreateBuffer(deviceContext.context(), CL_MEM_READ_ONLY, - sizeof(cl_float), nullptr, &cl_error); - GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, - ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str()); + DeviceBuffer nbfp_comb; + initParamLookupTable(&nbfp_comb, nullptr, nbatParams.nbfp_comb.data(), nnbfp_comb, deviceContext); + nbp->nbfp_comb_climg2d = nbfp_comb; } } } -- 2.22.0