From: Aleksei Iupinov Date: Fri, 22 Sep 2017 12:41:02 +0000 (+0200) Subject: Move CUDA texture setup code from NB CUDA module to cudautils.cu X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=3a6d0768a9692b35441ac1d7b93f48555b11c751;p=alexxy%2Fgromacs.git Move CUDA texture setup code from NB CUDA module to cudautils.cu Change-Id: I7e47a65866c29be06ce522572e90a17c775157ab --- diff --git a/src/gromacs/gpu_utils/cuda_arch_utils.cuh b/src/gromacs/gpu_utils/cuda_arch_utils.cuh index 7cceb1a04d..4639acdd23 100644 --- a/src/gromacs/gpu_utils/cuda_arch_utils.cuh +++ b/src/gromacs/gpu_utils/cuda_arch_utils.cuh @@ -150,6 +150,10 @@ T gmx_shfl_down_sync(const unsigned int activeMask, #define DISABLE_CUDA_TEXTURES 0 #endif +/*! \brief True if the use of texture fetch in the CUDA kernels is disabled. */ +static const bool c_disableCudaTextures = DISABLE_CUDA_TEXTURES; + + /* CUDA architecture technical characteristics. Needs macros because it is used * in the __launch_bounds__ function qualifiers and might need it in preprocessor * conditionals. diff --git a/src/gromacs/gpu_utils/cudautils.cu b/src/gromacs/gpu_utils/cudautils.cu index 6021331ffb..75d87fa8ce 100644 --- a/src/gromacs/gpu_utils/cudautils.cu +++ b/src/gromacs/gpu_utils/cudautils.cu @@ -37,8 +37,10 @@ #include "cudautils.cuh" -#include +#include +#include +#include "gromacs/gpu_utils/cuda_arch_utils.cuh" #include "gromacs/utility/smalloc.h" /*** Generic CUDA data operation wrappers ***/ @@ -246,3 +248,97 @@ void cu_realloc_buffered(void **d_dest, void *h_src, } } } + +bool use_texobj(const gmx_device_info_t *dev_info) +{ + assert(!c_disableCudaTextures); + /* Only device CC >= 3.0 (Kepler and later) support texture objects */ + return (dev_info->prop.major >= 3); +} + +/*! \brief Set up texture object for an array of type T. + * + * Set up texture object for an array of type T and bind it to the device memory + * \p d_ptr points to. + * + * \tparam[in] T Raw data type + * \param[out] texObj texture object to initialize + * \param[in] d_ptr pointer to device global memory to bind \p texObj to + * \param[in] sizeInBytes size of memory area to bind \p texObj to + */ +template +static void setup1DTexture(cudaTextureObject_t &texObj, + void *d_ptr, + size_t sizeInBytes) +{ + assert(!c_disableCudaTextures); + + cudaError_t stat; + cudaResourceDesc rd; + cudaTextureDesc td; + + memset(&rd, 0, sizeof(rd)); + rd.resType = cudaResourceTypeLinear; + rd.res.linear.devPtr = d_ptr; + rd.res.linear.desc = cudaCreateChannelDesc(); + rd.res.linear.sizeInBytes = sizeInBytes; + + memset(&td, 0, sizeof(td)); + td.readMode = cudaReadModeElementType; + stat = cudaCreateTextureObject(&texObj, &rd, &td, NULL); + CU_RET_ERR(stat, "cudaCreateTextureObject failed"); +} + +/*! \brief Set up texture reference for an array of type T. + * + * Set up texture object for an array of type T and bind it to the device memory + * \p d_ptr points to. + * + * \tparam[in] T Raw data type + * \param[out] texObj texture reference to initialize + * \param[in] d_ptr pointer to device global memory to bind \p texObj to + * \param[in] sizeInBytes size of memory area to bind \p texObj to + */ +template +static void setup1DTexture(const struct texture *texRef, + const void *d_ptr, + size_t sizeInBytes) +{ + assert(!c_disableCudaTextures); + + cudaError_t stat; + cudaChannelFormatDesc cd; + + cd = cudaCreateChannelDesc(); + stat = cudaBindTexture(nullptr, texRef, d_ptr, &cd, sizeInBytes); + CU_RET_ERR(stat, "cudaBindTexture failed"); +} + +template +void initParamLookupTable(T * &d_ptr, + cudaTextureObject_t &texObj, + const struct texture *texRef, + const T *h_ptr, + int numElem, + const gmx_device_info_t *devInfo) +{ + const size_t sizeInBytes = numElem * sizeof(*d_ptr); + cudaError_t stat = cudaMalloc((void **)&d_ptr, sizeInBytes); + CU_RET_ERR(stat, "cudaMalloc failed in initParamLookupTable"); + cu_copy_H2D(d_ptr, (void *)h_ptr, sizeInBytes); + + if (!c_disableCudaTextures) + { + if (use_texobj(devInfo)) + { + setup1DTexture(texObj, d_ptr, sizeInBytes); + } + else + { + setup1DTexture(texRef, d_ptr, sizeInBytes); + } + } +} + +//! Add explicit instantiations of initParamLookupTable() here as needed +template void initParamLookupTable(float * &, cudaTextureObject_t &, const texture *, const float *, int, const gmx_device_info_t *); diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh index 604e54b42c..cfe5c2a381 100644 --- a/src/gromacs/gpu_utils/cudautils.cuh +++ b/src/gromacs/gpu_utils/cudautils.cuh @@ -165,4 +165,34 @@ float cu_event_elapsed(cudaEvent_t /*start*/, cudaEvent_t /*end*/); /*! Waits for event end to complete and calculates the time between start and end. */ int cu_wait_event_time(cudaEvent_t /*end*/, cudaEvent_t /*begin*/, float * /*time*/); +/*! \brief Return whether texture objects are used on this device. + * + * \todo This should be static in cudautils.cu, as soon as texture destruction code is moved there as well + * + * \param[in] pointer to the GPU device info structure to inspect for texture objects support + * \return true if texture objects are used on this device + */ +bool use_texobj(const gmx_device_info_t *dev_info); + +/*! \brief Initialize parameter lookup table. + * + * Initializes device memory, copies data from host and binds + * a texture to allocated device memory to be used for parameter lookup. + * + * \tparam[in] T Raw data type + * \param[out] d_ptr device pointer to the memory to be allocated + * \param[out] texObj texture object to be initialized + * \param[out] texRef texture reference to be initialized + * \param[in] h_ptr pointer to the host memory to be uploaded to the device + * \param[in] numElem number of elements in the h_ptr + * \param[in] devInfo pointer to the info struct of the device in use + */ +template +void initParamLookupTable(T * &d_ptr, + cudaTextureObject_t &texObj, + const struct texture *texRef, + const T *h_ptr, + int numElem, + const gmx_device_info_t *devInfo); + #endif diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu index 79ef8e842e..24f316d549 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu @@ -86,19 +86,6 @@ static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t *nb); static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t *nbparam, const gmx_device_info_t *dev_info); - -/*! \brief Return whether texture objects are used on this device. - * - * \param[in] pointer to the GPU device info structure to inspect for texture objects support - * \return true if texture objects are used on this device - */ -static bool use_texobj(const gmx_device_info_t *dev_info) -{ - assert(!c_disableCudaTextures); - /* Only device CC >= 3.0 (Kepler and later) support texture objects */ - return (dev_info->prop.major >= 3); -} - /*! \brief Return whether combination rules are used. * * \param[in] pointer to nonbonded paramter struct @@ -110,104 +97,6 @@ static inline bool useLjCombRule(const cu_nbparam_t *nbparam) nbparam->vdwtype == evdwCuCUTCOMBLB); } -/*! \brief Set up texture object for an array of type T. - * - * Set up texture object for an array of type T and bind it to the device memory - * \p d_ptr points to. - * - * \tparam[in] T Raw data type - * \param[out] texObj texture object to initialize - * \param[in] d_ptr pointer to device global memory to bind \p texObj to - * \param[in] sizeInBytes size of memory area to bind \p texObj to - */ -template -static void setup1DTexture(cudaTextureObject_t &texObj, - void *d_ptr, - size_t sizeInBytes) -{ - assert(!c_disableCudaTextures); - - cudaError_t stat; - cudaResourceDesc rd; - cudaTextureDesc td; - - memset(&rd, 0, sizeof(rd)); - rd.resType = cudaResourceTypeLinear; - rd.res.linear.devPtr = d_ptr; - rd.res.linear.desc = cudaCreateChannelDesc(); - rd.res.linear.sizeInBytes = sizeInBytes; - - memset(&td, 0, sizeof(td)); - td.readMode = cudaReadModeElementType; - stat = cudaCreateTextureObject(&texObj, &rd, &td, NULL); - CU_RET_ERR(stat, "cudaCreateTextureObject failed"); -} - -/*! \brief Set up texture reference for an array of type T. - * - * Set up texture object for an array of type T and bind it to the device memory - * \p d_ptr points to. - * - * \tparam[in] T Raw data type - * \param[out] texObj texture reference to initialize - * \param[in] d_ptr pointer to device global memory to bind \p texObj to - * \param[in] sizeInBytes size of memory area to bind \p texObj to - */ -template -static void setup1DTexture(const struct texture *texRef, - const void *d_ptr, - size_t sizeInBytes) -{ - assert(!c_disableCudaTextures); - - cudaError_t stat; - cudaChannelFormatDesc cd; - - cd = cudaCreateChannelDesc(); - stat = cudaBindTexture(nullptr, texRef, d_ptr, &cd, sizeInBytes); - CU_RET_ERR(stat, "cudaBindTexture failed"); -} - -/*! \brief Initialize parameter lookup table. - * - * Initializes device memory, copies data from host and binds - * a texture to allocated device memory to be used for LJ/Ewald/... parameter - * lookup. - * - * \tparam[in] T Raw data type - * \param[out] d_ptr device pointer to the memory to be allocated - * \param[out] texObj texture object to be initialized - * \param[out] texRef texture reference to be initialized - * \param[in] h_ptr pointer to the host memory to be uploaded to the device - * \param[in] numElem number of elements in the h_ptr - * \param[in] devInfo pointer to the info struct of the device in use - */ -template -static void initParamLookupTable(T * &d_ptr, - cudaTextureObject_t &texObj, - const struct texture *texRef, - const T *h_ptr, - int numElem, - const gmx_device_info_t *devInfo) -{ - const size_t sizeInBytes = numElem * sizeof(*d_ptr); - cudaError_t stat = cudaMalloc((void **)&d_ptr, sizeInBytes); - CU_RET_ERR(stat, "cudaMalloc failed in initParamLookupTable"); - cu_copy_H2D(d_ptr, (void *)h_ptr, sizeInBytes); - - if (!c_disableCudaTextures) - { - if (use_texobj(devInfo)) - { - setup1DTexture(texObj, d_ptr, sizeInBytes); - } - else - { - setup1DTexture(texRef, d_ptr, sizeInBytes); - } - } -} - /*! \brief Initialized the Ewald Coulomb correction GPU table. Tabulates the Ewald Coulomb force and initializes the size/scale diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h index 609123f346..bbd283d075 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h @@ -74,10 +74,6 @@ static const int c_numClPerSupercl = c_nbnxnGpuNumClusterPerSupercluster; /*! \brief cluster size = number of atoms per cluster. */ static const int c_clSize = c_nbnxnGpuClusterSize; -/*! \brief True if the use of texture fetch in the CUDA kernels is disabled. */ -static const bool c_disableCudaTextures = DISABLE_CUDA_TEXTURES; - - #ifdef __cplusplus extern "C" { #endif