/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "pme_gpu_types.h"
/*! \brief \internal
- * A single structure encompassing all the PME data used in CUDA kernels.
- * This inherits from PmeGpuKernelParamsBase and adds a couple cudaTextureObject_t handles,
- * which we would like to avoid in plain C++.
+ * An alias for PME parameters in CUDA.
+ * \todo Remove if we decide to unify CUDA and OpenCL
*/
struct PmeGpuCudaKernelParams : PmeGpuKernelParamsBase
{
- /* These are CUDA texture objects, related to the grid size. */
- /*! \brief CUDA texture object for accessing grid.d_fractShiftsTable */
- cudaTextureObject_t fractShiftsTableTexture;
- /*! \brief CUDA texture object for accessing grid.d_gridlineIndicesTable */
- cudaTextureObject_t gridlineIndicesTableTexture;
+ // Place CUDA-specific stuff here
};
#endif
const int newFractShiftsSize = cellCount * (nx + ny + nz);
-#if GMX_GPU == GMX_GPU_CUDA
- initParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable, kernelParamsPtr->fractShiftsTableTexture,
- pmeGpu->common->fsh.data(), newFractShiftsSize);
+ initParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable,
+ &kernelParamsPtr->fractShiftsTableTexture, pmeGpu->common->fsh.data(),
+ newFractShiftsSize, pmeGpu->archSpecific->deviceContext_);
- initParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable,
- kernelParamsPtr->gridlineIndicesTableTexture, pmeGpu->common->nn.data(),
- newFractShiftsSize);
-#elif GMX_GPU == GMX_GPU_OPENCL
- // No dedicated texture routines....
- allocateDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable, newFractShiftsSize,
- pmeGpu->archSpecific->deviceContext_);
- allocateDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable, newFractShiftsSize,
- pmeGpu->archSpecific->deviceContext_);
- copyToDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable, pmeGpu->common->fsh.data(), 0,
- newFractShiftsSize, pmeGpu->archSpecific->pmeStream_,
- GpuApiCallBehavior::Async, nullptr);
- copyToDeviceBuffer(&kernelParamsPtr->grid.d_gridlineIndicesTable, pmeGpu->common->nn.data(), 0,
- newFractShiftsSize, pmeGpu->archSpecific->pmeStream_,
- GpuApiCallBehavior::Async, nullptr);
-#endif
+ initParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable,
+ &kernelParamsPtr->gridlineIndicesTableTexture, pmeGpu->common->nn.data(),
+ newFractShiftsSize, pmeGpu->archSpecific->deviceContext_);
}
void pme_gpu_free_fract_shifts(const PmeGpu* pmeGpu)
{
auto* kernelParamsPtr = pmeGpu->kernelParams.get();
#if GMX_GPU == GMX_GPU_CUDA
- destroyParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable,
+ destroyParamLookupTable(&kernelParamsPtr->grid.d_fractShiftsTable,
kernelParamsPtr->fractShiftsTableTexture);
- destroyParamLookupTable(kernelParamsPtr->grid.d_gridlineIndicesTable,
+ destroyParamLookupTable(&kernelParamsPtr->grid.d_gridlineIndicesTable,
kernelParamsPtr->gridlineIndicesTableTexture);
#elif GMX_GPU == GMX_GPU_OPENCL
freeDeviceBuffer(&kernelParamsPtr->grid.d_fractShiftsTable);
};
/*! \internal \brief
- * A single structure encompassing almost all the PME data used in GPU kernels on device.
- * This is inherited by the GPU framework-specific structure
- * (PmeGpuCudaKernelParams in pme.cuh).
- * This way, most code preparing the kernel parameters can be GPU-agnostic by casting
- * the kernel parameter data pointer to PmeGpuKernelParamsBase.
+ * A single structure encompassing all the PME data used in GPU kernels on device.
+ * To extend the list with platform-specific parameters, this can be inherited by the
+ * GPU framework-specific structure.
*/
struct PmeGpuKernelParamsBase
{
* before launching spreading.
*/
struct PmeGpuDynamicParams current;
+ /* These texture objects are only used in CUDA and are related to the grid size. */
+ /*! \brief Texture object for accessing grid.d_fractShiftsTable */
+ HIDE_FROM_OPENCL_COMPILER(DeviceTexture) fractShiftsTableTexture;
+ /*! \brief Texture object for accessing grid.d_gridlineIndicesTable */
+ HIDE_FROM_OPENCL_COMPILER(DeviceTexture) gridlineIndicesTableTexture;
};
#endif
{
return cu_copy_H2D(d_dest, h_src, bytes, GpuApiCallBehavior::Async, s);
}
-
-/*! \brief Set up texture object for an array of type T.
- *
- * Set up texture object for an array of type T and bind it to the device memory
- * \p d_ptr points to.
- *
- * \tparam[in] T Raw data type
- * \param[out] texObj texture object to initialize
- * \param[in] d_ptr pointer to device global memory to bind \p texObj to
- * \param[in] sizeInBytes size of memory area to bind \p texObj to
- */
-template<typename T>
-static void setup1DTexture(cudaTextureObject_t& texObj, void* d_ptr, size_t sizeInBytes)
-{
- assert(!c_disableCudaTextures);
-
- cudaError_t stat;
- cudaResourceDesc rd;
- cudaTextureDesc td;
-
- memset(&rd, 0, sizeof(rd));
- rd.resType = cudaResourceTypeLinear;
- rd.res.linear.devPtr = d_ptr;
- rd.res.linear.desc = cudaCreateChannelDesc<T>();
- rd.res.linear.sizeInBytes = sizeInBytes;
-
- memset(&td, 0, sizeof(td));
- td.readMode = cudaReadModeElementType;
- stat = cudaCreateTextureObject(&texObj, &rd, &td, nullptr);
- CU_RET_ERR(stat, "cudaCreateTextureObject failed");
-}
-
-template<typename T>
-void initParamLookupTable(T*& d_ptr, cudaTextureObject_t& texObj, const T* h_ptr, int numElem)
-{
- const size_t sizeInBytes = numElem * sizeof(*d_ptr);
- cudaError_t stat = cudaMalloc((void**)&d_ptr, sizeInBytes);
- CU_RET_ERR(stat, "cudaMalloc failed in initParamLookupTable");
- cu_copy_H2D_sync(d_ptr, (void*)h_ptr, sizeInBytes);
-
- if (!c_disableCudaTextures)
- {
- setup1DTexture<T>(texObj, d_ptr, sizeInBytes);
- }
-}
-
-template<typename T>
-void destroyParamLookupTable(T* d_ptr, cudaTextureObject_t texObj)
-{
- if (!c_disableCudaTextures && texObj)
- {
- CU_RET_ERR(cudaDestroyTextureObject(texObj), "cudaDestroyTextureObject on texObj failed");
- }
- CU_RET_ERR(cudaFree(d_ptr), "cudaFree failed");
-}
-
-/*! \brief Add explicit instantiations of init/destroyParamLookupTable() here as needed.
- * One should also verify that the result of cudaCreateChannelDesc<T>() during texture setup
- * looks reasonable, when instantiating the templates for new types - just in case.
- */
-template void initParamLookupTable<float>(float*&, cudaTextureObject_t&, const float*, int);
-template void destroyParamLookupTable<float>(float*, cudaTextureObject_t);
-template void initParamLookupTable<int>(int*&, cudaTextureObject_t&, const int*, int);
-template void destroyParamLookupTable<int>(int*, cudaTextureObject_t);
// GPU table object. There is also almost self-contained fetchFromParamLookupTable()
// in cuda_kernel_utils.cuh. They could all live in a separate class/struct file.
-/*! \brief Initialize parameter lookup table.
- *
- * Initializes device memory, copies data from host and binds
- * a texture to allocated device memory to be used for parameter lookup.
- *
- * \tparam[in] T Raw data type
- * \param[out] d_ptr device pointer to the memory to be allocated
- * \param[out] texObj texture object to be initialized
- * \param[in] h_ptr pointer to the host memory to be uploaded to the device
- * \param[in] numElem number of elements in the h_ptr
- */
-template<typename T>
-void initParamLookupTable(T*& d_ptr, cudaTextureObject_t& texObj, const T* h_ptr, int numElem);
-
-// Add extern declarations so each translation unit understands that
-// there will be a definition provided.
-extern template void initParamLookupTable<int>(int*&, cudaTextureObject_t&, const int*, int);
-extern template void initParamLookupTable<float>(float*&, cudaTextureObject_t&, const float*, int);
-
-/*! \brief Destroy parameter lookup table.
- *
- * Unbinds texture object, deallocates device memory.
- *
- * \tparam[in] T Raw data type
- * \param[in] d_ptr Device pointer to the memory to be deallocated
- * \param[in] texObj Texture object to be deinitialized
- */
-template<typename T>
-void destroyParamLookupTable(T* d_ptr, cudaTextureObject_t texObj);
-
-// Add extern declarations so each translation unit understands that
-// there will be a definition provided.
-extern template void destroyParamLookupTable<int>(int*, cudaTextureObject_t);
-extern template void destroyParamLookupTable<float>(float*, cudaTextureObject_t);
-
/*! \brief Add a triplets stored in a float3 to an rvec variable.
*
* \param[out] a Rvec to increment
* \inlibraryapi
*/
+#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior
#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
/*! \brief
* Allocates a device-side buffer.
}
}
-
/*! \brief
* Performs the device-to-host data copy, synchronous or asynchronously on request.
*
return buffer != nullptr;
}
+//! Device texture wrapper.
+using DeviceTexture = cudaTextureObject_t;
+
+/*! \brief Create a texture object for an array of type ValueType.
+ *
+ * Creates the device buffer, copies data and binds texture object for an array of type ValueType.
+ *
+ * \todo Test if using textures is still relevant on modern hardware.
+ *
+ * \tparam ValueType Raw data type.
+ *
+ * \param[out] deviceBuffer Device buffer to store data in.
+ * \param[out] deviceTexture Device texture object to initialize.
+ * \param[in] hostBuffer Host buffer to get date from
+ * \param[in] numValues Number of elements in the buffer.
+ * \param[in] deviceContext GPU device context.
+ */
+template<typename ValueType>
+void initParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer,
+ DeviceTexture* deviceTexture,
+ const ValueType* hostBuffer,
+ int numValues,
+ const DeviceContext& deviceContext)
+{
+ if (numValues == 0)
+ {
+ return;
+ }
+ GMX_ASSERT(hostBuffer, "Host buffer should be specified.");
+
+ allocateDeviceBuffer(deviceBuffer, numValues, deviceContext);
+
+ const size_t sizeInBytes = numValues * sizeof(ValueType);
+
+ cudaError_t stat =
+ cudaMemcpy(*((ValueType**)deviceBuffer), hostBuffer, sizeInBytes, cudaMemcpyHostToDevice);
+
+ GMX_RELEASE_ASSERT(
+ stat == cudaSuccess,
+ gmx::formatString("Synchronous H2D copy failed (CUDA error: %s).", cudaGetErrorName(stat))
+ .c_str());
+
+ if (!c_disableCudaTextures)
+ {
+ cudaResourceDesc rd;
+ cudaTextureDesc td;
+
+ memset(&rd, 0, sizeof(rd));
+ rd.resType = cudaResourceTypeLinear;
+ rd.res.linear.devPtr = *deviceBuffer;
+ rd.res.linear.desc = cudaCreateChannelDesc<ValueType>();
+ rd.res.linear.sizeInBytes = sizeInBytes;
+
+ memset(&td, 0, sizeof(td));
+ td.readMode = cudaReadModeElementType;
+ stat = cudaCreateTextureObject(deviceTexture, &rd, &td, nullptr);
+ GMX_RELEASE_ASSERT(stat == cudaSuccess,
+ gmx::formatString("cudaCreateTextureObject failed (CUDA error: %s).",
+ cudaGetErrorName(stat))
+ .c_str());
+ }
+}
+
+/*! \brief Unbind the texture and release the CUDA texture object.
+ *
+ * \tparam ValueType Raw data type
+ *
+ * \param[in,out] deviceBuffer Device buffer to store data in.
+ * \param[in,out] deviceTexture Device texture object to unbind.
+ */
+template<typename ValueType>
+void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, DeviceTexture& deviceTexture)
+{
+ if (!c_disableCudaTextures && deviceTexture && deviceBuffer)
+ {
+ cudaError_t stat = cudaDestroyTextureObject(deviceTexture);
+ GMX_RELEASE_ASSERT(
+ stat == cudaSuccess,
+ gmx::formatString(
+ "cudaDestroyTextureObject on texture object failed (CUDA error: %s).",
+ cudaGetErrorName(stat))
+ .c_str());
+ }
+ freeDeviceBuffer(deviceBuffer);
+}
+
#endif
return retval == CL_SUCCESS && static_cast<int>(size) >= requiredSize;
}
+//! Device texture wrapper.
+using DeviceTexture = void*;
+
+/*! \brief Create a texture object for an array of type ValueType.
+ *
+ * Creates the device buffer and copies read-only data for an array of type ValueType.
+ *
+ * \todo Decide if using image2d is most efficient.
+ *
+ * \tparam ValueType Raw data type.
+ *
+ * \param[out] deviceBuffer Device buffer to store data in.
+ * \param[in] hostBuffer Host buffer to get date from.
+ * \param[in] numValues Number of elements in the buffer.
+ * \param[in] deviceContext GPU device context.
+ */
+template<typename ValueType>
+void initParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer,
+ DeviceTexture* /* deviceTexture */,
+ const ValueType* hostBuffer,
+ int numValues,
+ const DeviceContext& deviceContext)
+{
+ GMX_ASSERT(hostBuffer, "Host buffer pointer can not be null");
+ const size_t bytes = numValues * sizeof(ValueType);
+ cl_int clError;
+ *deviceBuffer = clCreateBuffer(deviceContext.context(),
+ CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+ bytes, const_cast<ValueType*>(hostBuffer), &clError);
+
+ GMX_RELEASE_ASSERT(clError == CL_SUCCESS,
+ gmx::formatString("Constant memory allocation failed (OpenCL error %d: %s)",
+ clError, ocl_get_error_string(clError).c_str())
+ .c_str());
+}
+
+/*! \brief Release the OpenCL device buffer.
+ *
+ * \tparam ValueType Raw data type.
+ *
+ * \param[in,out] deviceBuffer Device buffer to store data in.
+ */
+template<typename ValueType>
+void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, DeviceTexture& /* deviceTexture*/)
+{
+ freeDeviceBuffer(deviceBuffer);
+}
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
and the table GPU array. If called with an already allocated table,
it just re-uploads the table.
*/
-static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, cu_nbparam_t* nbp)
+static void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
+ cu_nbparam_t* nbp,
+ const DeviceContext& deviceContext)
{
if (nbp->coulomb_tab != nullptr)
{
}
nbp->coulomb_tab_scale = tables.scale;
- initParamLookupTable(nbp->coulomb_tab, nbp->coulomb_tab_texobj, tables.tableF.data(),
- tables.tableF.size());
+ initParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(),
+ tables.tableF.size(), deviceContext);
}
static void init_nbparam(cu_nbparam_t* nbp,
const interaction_const_t* ic,
const PairlistParams& listParams,
- const nbnxn_atomdata_t::Params& nbatParams)
+ const nbnxn_atomdata_t::Params& nbatParams,
+ const DeviceContext& deviceContext)
{
int ntypes;
if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN)
{
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
- init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp);
+ init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext);
}
/* set up LJ parameter lookup table */
if (!useLjCombRule(nbp))
{
- initParamLookupTable(nbp->nbfp, nbp->nbfp_texobj, nbatParams.nbfp.data(), 2 * ntypes * ntypes);
+ initParamLookupTable(&nbp->nbfp, &nbp->nbfp_texobj, nbatParams.nbfp.data(),
+ 2 * ntypes * ntypes, deviceContext);
}
/* set up LJ-PME parameter lookup table */
if (ic->vdwtype == evdwPME)
{
- initParamLookupTable(nbp->nbfp_comb, nbp->nbfp_comb_texobj, nbatParams.nbfp_comb.data(), 2 * ntypes);
+ initParamLookupTable(&nbp->nbfp_comb, &nbp->nbfp_comb_texobj, nbatParams.nbfp_comb.data(),
+ 2 * ntypes, deviceContext);
}
}
{
return;
}
+ NbnxmGpu* nb = nbv->gpu_nbv;
cu_nbparam_t* nbp = nbv->gpu_nbv->nbparam;
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
nbp->eeltype = pick_ewald_kernel_type(*ic);
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
- init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp);
+ init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
}
/*! Initializes the pair list data structure. */
const nbnxn_atomdata_t::Params& nbatParams)
{
init_atomdata_first(nb->atdat, nbatParams.numTypes);
- init_nbparam(nb->nbparam, ic, listParams, nbatParams);
+ init_nbparam(nb->nbparam, ic, listParams, nbatParams, *nb->deviceContext_);
/* clear energy and shift force outputs */
nbnxn_cuda_clear_e_fshift(nb);
{
if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
{
- destroyParamLookupTable(nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
+ destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
}
}
if (!useLjCombRule(nb->nbparam))
{
- destroyParamLookupTable(nbparam->nbfp, nbparam->nbfp_texobj);
+ destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj);
}
if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB)
{
- destroyParamLookupTable(nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
+ destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
}
stat = cudaFree(atdat->shift_vec);
cl_nbparam_t* nbp,
const DeviceContext& deviceContext)
{
- cl_mem coul_tab;
-
- cl_int cl_error;
-
if (nbp->coulomb_tab_climg2d != nullptr)
{
freeDeviceBuffer(&(nbp->coulomb_tab_climg2d));
}
- /* Switched from using textures to using buffers */
- // TODO: decide which alternative is most efficient - textures or buffers.
- /*
- cl_image_format array_format;
-
- array_format.image_channel_data_type = CL_FLOAT;
- array_format.image_channel_order = CL_R;
-
- coul_tab = clCreateImage2D(deviceContext.context(), CL_MEM_READ_WRITE |
- CL_MEM_COPY_HOST_PTR, &array_format, tabsize, 1, 0, ftmp, &cl_error);
- */
+ DeviceBuffer<real> coulomb_tab;
- coul_tab = clCreateBuffer(deviceContext.context(),
- CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- tables.tableF.size() * sizeof(cl_float),
- const_cast<real*>(tables.tableF.data()), &cl_error);
- GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
- ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
+ initParamLookupTable(&coulomb_tab, nullptr, tables.tableF.data(), tables.tableF.size(), deviceContext);
- nbp->coulomb_tab_climg2d = coul_tab;
+ nbp->coulomb_tab_climg2d = coulomb_tab;
nbp->coulomb_tab_scale = tables.scale;
}
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext);
}
else
- // TODO: improvement needed.
- // The image2d is created here even if eeltype is not eelCuEWALD_TAB or eelCuEWALD_TAB_TWIN
- // because the OpenCL kernels don't accept nullptr values for image2D parameters.
{
- /* Switched from using textures to using buffers */
- // TODO: decide which alternative is most efficient - textures or buffers.
- /*
- cl_image_format array_format;
-
- array_format.image_channel_data_type = CL_FLOAT;
- array_format.image_channel_order = CL_R;
-
- nbp->coulomb_tab_climg2d = clCreateImage2D(deviceContext.context(),
- CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);
- */
-
nbp->coulomb_tab_climg2d = clCreateBuffer(deviceContext.context(), CL_MEM_READ_ONLY,
sizeof(cl_float), nullptr, &cl_error);
GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
const int nnbfp_comb = 2 * nbatParams.numTypes;
{
- /* Switched from using textures to using buffers */
- // TODO: decide which alternative is most efficient - textures or buffers.
- /*
- cl_image_format array_format;
-
- array_format.image_channel_data_type = CL_FLOAT;
- array_format.image_channel_order = CL_R;
-
- nbp->nbfp_climg2d = clCreateImage2D(deviceContext.context(), CL_MEM_READ_ONLY |
- CL_MEM_COPY_HOST_PTR, &array_format, nnbfp, 1, 0, nbat->nbfp, &cl_error);
- */
-
- nbp->nbfp_climg2d = clCreateBuffer(
- deviceContext.context(), CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- nnbfp * sizeof(cl_float), const_cast<float*>(nbatParams.nbfp.data()), &cl_error);
- GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
- ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
+ /* set up LJ parameter lookup table */
+ DeviceBuffer<real> nbfp;
+ initParamLookupTable(&nbfp, nullptr, nbatParams.nbfp.data(), nnbfp, deviceContext);
+ nbp->nbfp_climg2d = nbfp;
if (ic->vdwtype == evdwPME)
{
- /* Switched from using textures to using buffers */
- // TODO: decide which alternative is most efficient - textures or buffers.
- /* nbp->nbfp_comb_climg2d = clCreateImage2D(deviceContext.context(), CL_MEM_READ_WRITE |
- CL_MEM_COPY_HOST_PTR, &array_format, nnbfp_comb, 1, 0, nbat->nbfp_comb, &cl_error);*/
- nbp->nbfp_comb_climg2d =
- clCreateBuffer(deviceContext.context(),
- CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- nnbfp_comb * sizeof(cl_float),
- const_cast<float*>(nbatParams.nbfp_comb.data()), &cl_error);
- GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
- ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
- }
- else
- {
- // TODO: improvement needed.
- // The image2d is created here even if vdwtype is not evdwPME because the OpenCL kernels
- // don't accept nullptr values for image2D parameters.
- /* Switched from using textures to using buffers */
- // TODO: decide which alternative is most efficient - textures or buffers.
- /* nbp->nbfp_comb_climg2d = clCreateImage2D(deviceContext.context(),
- CL_MEM_READ_WRITE, &array_format, 1, 1, 0, nullptr, &cl_error);*/
- nbp->nbfp_comb_climg2d = clCreateBuffer(deviceContext.context(), CL_MEM_READ_ONLY,
- sizeof(cl_float), nullptr, &cl_error);
- GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS,
- ("clCreateBuffer failed: " + ocl_get_error_string(cl_error)).c_str());
+ DeviceBuffer<float> nbfp_comb;
+ initParamLookupTable(&nbfp_comb, nullptr, nbatParams.nbfp_comb.data(), nnbfp_comb, deviceContext);
+ nbp->nbfp_comb_climg2d = nbfp_comb;
}
}
}