From d3e584c0c950e4973102d6bdfac68e6a6e0cb71b Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Mon, 8 Jun 2020 06:58:20 +0000 Subject: [PATCH] Unify VdW and Electrostatic kernel enumerations in CUDA and OpenCL versions of NBNXM These enumerations are identical in CUDA and OpenCL. --- src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 18 +++--- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 48 ++++++-------- src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h | 50 +-------------- src/gromacs/nbnxm/nbnxm_gpu.h | 58 +++++++++++++++++ src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 17 +++-- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 32 ++++------ src/gromacs/nbnxm/opencl/nbnxm_ocl_internal.h | 62 ------------------ .../nbnxm/opencl/nbnxm_ocl_jit_support.cpp | 8 +-- src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h | 63 +++---------------- 9 files changed, 120 insertions(+), 236 deletions(-) delete mode 100644 src/gromacs/nbnxm/opencl/nbnxm_ocl_internal.h diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 8c76c4e95f..241392f187 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -163,7 +163,7 @@ static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation */ /*! Force-only kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelCuNR][evdwCuNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda, @@ -191,7 +191,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelCuNR][evdwCuNR] }; /*! Force + energy kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelCuNR][evdwCuNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda, @@ -219,7 +219,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelCuNR][evdwCuNR] = }; /*! Force + pruning kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelCuNR][evdwCuNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda, @@ -249,7 +249,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelCuNR][evdwCuNR] = }; /*! Force + energy + pruning kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelCuNR][evdwCuNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda, @@ -289,9 +289,9 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int e { nbnxn_cu_kfunc_ptr_t res; - GMX_ASSERT(eeltype < eelCuNR, + GMX_ASSERT(eeltype < eelTypeNR, "The electrostatics type requested is not implemented in the CUDA kernels."); - GMX_ASSERT(evdwtype < evdwCuNR, + GMX_ASSERT(evdwtype < evdwTypeNR, "The VdW type requested is not implemented in the CUDA kernels."); /* assert assumptions made by the kernels */ @@ -343,7 +343,7 @@ static inline int calc_shmem_required_nonbonded(const int num_thre /* cj in shared memory, for each warp separately */ shmem += num_threads_z * c_nbnxnGpuClusterpairSplit * c_nbnxnGpuJgroupSize * sizeof(int); - if (nbp->vdwtype == evdwCuCUTCOMBGEOM || nbp->vdwtype == evdwCuCUTCOMBLB) + if (nbp->vdwtype == evdwTypeCUTCOMBGEOM || nbp->vdwtype == evdwTypeCUTCOMBLB) { /* i-atom LJ combination parameters in shared memory */ shmem += c_nbnxnGpuNumClusterPerSupercluster * c_clSize * sizeof(float2); @@ -816,9 +816,9 @@ void cuda_set_cacheconfig() { cudaError_t stat; - for (int i = 0; i < eelCuNR; i++) + for (int i = 0; i < eelTypeNR; i++) { - for (int j = 0; j < evdwCuNR; j++) + for (int j = 0; j < evdwTypeNR; j++) { /* Default kernel 32/32 kB Shared/L1 */ cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferEqual); diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index 526c2e7355..962f5a049e 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -95,16 +95,6 @@ static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb); /* Fw. decl, */ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam); -/*! \brief Return whether combination rules are used. - * - * \param[in] pointer to nonbonded paramter struct - * \return true if combination rules are used in this run, false otherwise - */ -static inline bool useLjCombRule(const cu_nbparam_t* nbparam) -{ - return (nbparam->vdwtype == evdwCuCUTCOMBGEOM || nbparam->vdwtype == evdwCuCUTCOMBLB); -} - /*! \brief Initialized the Ewald Coulomb correction GPU table. Tabulates the Ewald Coulomb force and initializes the size/scale @@ -191,11 +181,11 @@ static int pick_ewald_kernel_type(const interaction_const_t& ic) forces it (use it for debugging/benchmarking only). */ if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == nullptr)) { - kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA : eelCuEWALD_TAB; + kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB; } else { - kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA_TWIN : eelCuEWALD_TAB_TWIN; + kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN; } return kernel_type; @@ -254,17 +244,17 @@ static void init_nbparam(cu_nbparam_t* nbp, case eintmodPOTSHIFT: switch (nbatParams.comb_rule) { - case ljcrNONE: nbp->vdwtype = evdwCuCUT; break; - case ljcrGEOM: nbp->vdwtype = evdwCuCUTCOMBGEOM; break; - case ljcrLB: nbp->vdwtype = evdwCuCUTCOMBLB; break; + case ljcrNONE: nbp->vdwtype = evdwTypeCUT; break; + case ljcrGEOM: nbp->vdwtype = evdwTypeCUTCOMBGEOM; break; + case ljcrLB: nbp->vdwtype = evdwTypeCUTCOMBLB; break; default: gmx_incons( "The requested LJ combination rule is not implemented in the CUDA " "GPU accelerated kernels!"); } break; - case eintmodFORCESWITCH: nbp->vdwtype = evdwCuFSWITCH; break; - case eintmodPOTSWITCH: nbp->vdwtype = evdwCuPSWITCH; break; + case eintmodFORCESWITCH: nbp->vdwtype = evdwTypeFSWITCH; break; + case eintmodPOTSWITCH: nbp->vdwtype = evdwTypePSWITCH; break; default: gmx_incons( "The requested VdW interaction modifier is not implemented in the CUDA GPU " @@ -276,12 +266,12 @@ static void init_nbparam(cu_nbparam_t* nbp, if (ic->ljpme_comb_rule == ljcrGEOM) { assert(nbatParams.comb_rule == ljcrGEOM); - nbp->vdwtype = evdwCuEWALDGEOM; + nbp->vdwtype = evdwTypeEWALDGEOM; } else { assert(nbatParams.comb_rule == ljcrLB); - nbp->vdwtype = evdwCuEWALDLB; + nbp->vdwtype = evdwTypeEWALDLB; } } else @@ -292,11 +282,11 @@ static void init_nbparam(cu_nbparam_t* nbp, if (ic->eeltype == eelCUT) { - nbp->eeltype = eelCuCUT; + nbp->eeltype = eelTypeCUT; } else if (EEL_RF(ic->eeltype)) { - nbp->eeltype = eelCuRF; + nbp->eeltype = eelTypeRF; } else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) { @@ -312,14 +302,14 @@ static void init_nbparam(cu_nbparam_t* nbp, /* generate table for PME */ nbp->coulomb_tab = nullptr; - if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN) + if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN) { GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); } /* set up LJ parameter lookup table */ - if (!useLjCombRule(nbp)) + if (!useLjCombRule(nbp->vdwtype)) { initParamLookupTable(&nbp->nbfp, &nbp->nbfp_texobj, nbatParams.nbfp.data(), 2 * ntypes * ntypes, deviceContext); @@ -645,7 +635,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) allocateDeviceBuffer(&d_atdat->f, nalloc, deviceContext); allocateDeviceBuffer(&d_atdat->xq, nalloc, deviceContext); - if (useLjCombRule(nb->nbparam)) + if (useLjCombRule(nb->nbparam->vdwtype)) { allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext); } @@ -667,7 +657,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) nbnxn_cuda_clear_f(nb, nalloc); } - if (useLjCombRule(nb->nbparam)) + if (useLjCombRule(nb->nbparam->vdwtype)) { static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(float2), "Size of the LJ parameters element should be equal to the size of float2."); @@ -691,7 +681,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam) { - if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN) + if (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN) { destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); } @@ -720,12 +710,12 @@ void gpu_free(NbnxmGpu* nb) delete nb->timers; - if (!useLjCombRule(nb->nbparam)) + if (!useLjCombRule(nb->nbparam->vdwtype)) { destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj); } - if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB) + if (nbparam->vdwtype == evdwTypeEWALDGEOM || nbparam->vdwtype == evdwTypeEWALDLB) { destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj); } @@ -801,7 +791,7 @@ int gpu_min_ci_balanced(NbnxmGpu* nb) gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) { - return ((nb->nbparam->eeltype == eelCuEWALD_ANA) || (nb->nbparam->eeltype == eelCuEWALD_ANA_TWIN)); + return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); } void* gpu_get_xq(NbnxmGpu* nb) diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h index eca71b1360..67f220d15d 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h @@ -76,52 +76,6 @@ const int c_cudaPruneKernelJ4Concurrency = GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY /*! \brief cluster size = number of atoms per cluster. */ static constexpr int c_clSize = c_nbnxnGpuClusterSize; -/*! \brief Electrostatic CUDA kernel flavors. - * - * Types of electrostatics implementations available in the CUDA non-bonded - * force kernels. These represent both the electrostatics types implemented - * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in - * enums.h) as well as encode implementation details analytical/tabulated - * and single or twin cut-off (for Ewald kernels). - * Note that the cut-off and RF kernels have only analytical flavor and unlike - * in the CPU kernels, the tabulated kernels are ATM Ewald-only. - * - * The row-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum eelCu -{ - eelCuCUT, - eelCuRF, - eelCuEWALD_TAB, - eelCuEWALD_TAB_TWIN, - eelCuEWALD_ANA, - eelCuEWALD_ANA_TWIN, - eelCuNR -}; - -/*! \brief VdW CUDA kernel flavors. - * - * The enumerates values correspond to the LJ implementations in the CUDA non-bonded - * kernels. - * - * The column-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum evdwCu -{ - evdwCuCUT, - evdwCuCUTCOMBGEOM, - evdwCuCUTCOMBLB, - evdwCuFSWITCH, - evdwCuPSWITCH, - evdwCuEWALDGEOM, - evdwCuEWALDLB, - evdwCuNR -}; - /* All structs prefixed with "cu_" hold data used in GPU calculations and * are passed to the kernels, except cu_timers_t. */ /*! \cond */ @@ -190,9 +144,9 @@ struct cu_atomdata struct cu_nbparam { - //! type of electrostatics, takes values from #eelCu + //! type of electrostatics, takes values from #eelType int eeltype; - //! type of VdW impl., takes values from #evdwCu + //! type of VdW impl., takes values from #evdwType int vdwtype; //! charge multiplication factor diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index 2370fc836b..465bce44f7 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -62,11 +62,69 @@ class GpuBonded; class StepWorkload; } // namespace gmx +/*! \brief Nbnxm electrostatic GPU kernel flavors. + * + * Types of electrostatics implementations available in the GPU non-bonded + * force kernels. These represent both the electrostatics types implemented + * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in + * enums.h) as well as encode implementation details analytical/tabulated + * and single or twin cut-off (for Ewald kernels). + * Note that the cut-off and RF kernels have only analytical flavor and unlike + * in the CPU kernels, the tabulated kernels are ATM Ewald-only. + * + * The row-order of pointers to different electrostatic kernels defined in + * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table + * should match the order of enumerated types below. + */ +enum eelType +{ + eelTypeCUT, + eelTypeRF, + eelTypeEWALD_TAB, + eelTypeEWALD_TAB_TWIN, + eelTypeEWALD_ANA, + eelTypeEWALD_ANA_TWIN, + eelTypeNR +}; + +/*! \brief Nbnxm VdW GPU kernel flavors. + * + * The enumerates values correspond to the LJ implementations in the GPU non-bonded + * kernels. + * + * The column-order of pointers to different electrostatic kernels defined in + * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table + * should match the order of enumerated types below. + */ +enum evdwType +{ + evdwTypeCUT, + evdwTypeCUTCOMBGEOM, + evdwTypeCUTCOMBLB, + evdwTypeFSWITCH, + evdwTypePSWITCH, + evdwTypeEWALDGEOM, + evdwTypeEWALDLB, + evdwTypeNR +}; + namespace Nbnxm { class Grid; +/*! \brief Returns true if LJ combination rules are used in the non-bonded kernels. + * + * \param[in] vdwType The VdW interaction/implementation type as defined by evdwType + * enumeration. + * + * \returns Whether combination rules are used by the run. + */ +static inline bool useLjCombRule(const int vdwType) +{ + return (vdwType == evdwTypeCUTCOMBGEOM || vdwType == evdwTypeCUTCOMBLB); +} + /*! \brief * Launch asynchronously the xq buffer host to device copy. * diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index a4547eb24d..226571dfd1 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -88,7 +88,6 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" -#include "nbnxm_ocl_internal.h" #include "nbnxm_ocl_types.h" namespace Nbnxm @@ -162,7 +161,7 @@ static inline void validate_global_work_size(const KernelLaunchConfig& config, */ /*! \brief Force-only kernel function names. */ -static const char* nb_kfunc_noener_noprune_ptr[eelOclNR][evdwOclNR] = { +static const char* nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { "nbnxn_kernel_ElecCut_VdwLJ_F_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_F_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_F_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_F_opencl", "nbnxn_kernel_ElecCut_VdwLJPsw_F_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_opencl", @@ -196,7 +195,7 @@ static const char* nb_kfunc_noener_noprune_ptr[eelOclNR][evdwOclNR] = { }; /*! \brief Force + energy kernel function pointers. */ -static const char* nb_kfunc_ener_noprune_ptr[eelOclNR][evdwOclNR] = { +static const char* nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { "nbnxn_kernel_ElecCut_VdwLJ_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJPsw_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_opencl", @@ -231,7 +230,7 @@ static const char* nb_kfunc_ener_noprune_ptr[eelOclNR][evdwOclNR] = { }; /*! \brief Force + pruning kernel function pointers. */ -static const char* nb_kfunc_noener_prune_ptr[eelOclNR][evdwOclNR] = { +static const char* nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { "nbnxn_kernel_ElecCut_VdwLJ_F_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_opencl", @@ -272,7 +271,7 @@ static const char* nb_kfunc_noener_prune_ptr[eelOclNR][evdwOclNR] = { }; /*! \brief Force + energy + pruning kernel function pointers. */ -static const char* nb_kfunc_ener_prune_ptr[eelOclNR][evdwOclNR] = { +static const char* nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { "nbnxn_kernel_ElecCut_VdwLJ_VF_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_opencl", @@ -347,9 +346,9 @@ static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwt cl_kernel* kernel_ptr; cl_int cl_error; - GMX_ASSERT(eeltype < eelOclNR, + GMX_ASSERT(eeltype < eelTypeNR, "The electrostatics type requested is not implemented in the OpenCL kernels."); - GMX_ASSERT(evdwtype < evdwOclNR, + GMX_ASSERT(evdwtype < evdwTypeNR, "The VdW type requested is not implemented in the OpenCL kernels."); if (bDoEne) @@ -1000,11 +999,11 @@ int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) forces it (use it for debugging/benchmarking only). */ if (!bTwinCut && (getenv("GMX_OCL_NB_EWALD_TWINCUT") == nullptr)) { - kernel_type = bUseAnalyticalEwald ? eelOclEWALD_ANA : eelOclEWALD_TAB; + kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB; } else { - kernel_type = bUseAnalyticalEwald ? eelOclEWALD_ANA_TWIN : eelOclEWALD_TAB_TWIN; + kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN; } return kernel_type; diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index b7b61e04a5..ce17f8152f 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -74,7 +74,6 @@ #include "gromacs/utility/real.h" #include "gromacs/utility/smalloc.h" -#include "nbnxm_ocl_internal.h" #include "nbnxm_ocl_types.h" namespace Nbnxm @@ -99,15 +98,6 @@ namespace Nbnxm */ static unsigned int gpu_min_ci_balanced_factor = 50; - -/*! \brief Returns true if LJ combination rules are used in the non-bonded kernels. - * - * Full doc in nbnxn_ocl_internal.h */ -bool useLjCombRule(int vdwType) -{ - return (vdwType == evdwOclCUTCOMBGEOM || vdwType == evdwOclCUTCOMBLB); -} - /*! \brief Tabulates the Ewald Coulomb force and initializes the size/scale * and the table GPU array. * @@ -198,17 +188,17 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_ case eintmodPOTSHIFT: switch (combRule) { - case ljcrNONE: *gpu_vdwtype = evdwOclCUT; break; - case ljcrGEOM: *gpu_vdwtype = evdwOclCUTCOMBGEOM; break; - case ljcrLB: *gpu_vdwtype = evdwOclCUTCOMBLB; break; + case ljcrNONE: *gpu_vdwtype = evdwTypeCUT; break; + case ljcrGEOM: *gpu_vdwtype = evdwTypeCUTCOMBGEOM; break; + case ljcrLB: *gpu_vdwtype = evdwTypeCUTCOMBLB; break; default: gmx_incons( "The requested LJ combination rule is not implemented in the " "OpenCL GPU accelerated kernels!"); } break; - case eintmodFORCESWITCH: *gpu_vdwtype = evdwOclFSWITCH; break; - case eintmodPOTSWITCH: *gpu_vdwtype = evdwOclPSWITCH; break; + case eintmodFORCESWITCH: *gpu_vdwtype = evdwTypeFSWITCH; break; + case eintmodPOTSWITCH: *gpu_vdwtype = evdwTypePSWITCH; break; default: gmx_incons( "The requested VdW interaction modifier is not implemented in the GPU " @@ -219,11 +209,11 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_ { if (ic->ljpme_comb_rule == ljcrGEOM) { - *gpu_vdwtype = evdwOclEWALDGEOM; + *gpu_vdwtype = evdwTypeEWALDGEOM; } else { - *gpu_vdwtype = evdwOclEWALDLB; + *gpu_vdwtype = evdwTypeEWALDLB; } } else @@ -233,11 +223,11 @@ static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_ if (ic->eeltype == eelCUT) { - *gpu_eeltype = eelOclCUT; + *gpu_eeltype = eelTypeCUT; } else if (EEL_RF(ic->eeltype)) { - *gpu_eeltype = eelOclRF; + *gpu_eeltype = eelTypeRF; } else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) { @@ -277,7 +267,7 @@ static void init_nbparam(cl_nbparam_t* nbp, } /* generate table for PME */ nbp->coulomb_tab_climg2d = nullptr; - if (nbp->eeltype == eelOclEWALD_TAB || nbp->eeltype == eelOclEWALD_TAB_TWIN) + if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN) { GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); @@ -917,7 +907,7 @@ int gpu_min_ci_balanced(NbnxmGpu* nb) //! This function is documented in the header file gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) { - return ((nb->nbparam->eeltype == eelOclEWALD_ANA) || (nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN)); + return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); } } // namespace Nbnxm diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_internal.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_internal.h deleted file mode 100644 index 6e7d232e71..0000000000 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_internal.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. - * Copyright (c) 2019,2020, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/*! \internal \file - * \brief Internal API of the OpenCL non-bonded module. - * - * \author Szilárd Páll - * \ingroup module_nbnxm - */ - -#include "gmxpre.h" - -#include "nbnxm_ocl_types.h" - -#ifndef NBNXN_OCL_INTERNAL_H -# define NBNXN_OCL_INTERNAL_H - -namespace Nbnxm -{ - -/*! \brief Returns true if LJ combination rules are used in the non-bonded kernels. - * - * \param[in] vdwType The VdW interaction/implementation type as defined by evdwOcl in - * nbnxn_ocl_types.h. \returns True if combination rules are used by the run - */ -bool useLjCombRule(int vdwType); - -} // namespace Nbnxm - -#endif /* NBNXN_OCL_INTERNAL_H */ diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp index 0f9c24ba55..ad6e1164b5 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp @@ -99,12 +99,12 @@ static const char* kernel_VdW_family_definitions[] = { /*! \brief Returns a string with the compiler defines required to avoid all flavour generation * - * For example if flavour eelOclRF with evdwOclFSWITCH, the output will be such that the corresponding + * For example if flavour eelTypeRF with evdwTypeFSWITCH, the output will be such that the corresponding * kernel flavour is generated: * -DGMX_OCL_FASTGEN (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh) - * -DEL_RF (The eelOclRF flavour) + * -DEL_RF (The eelTypeRF flavour) * -DEELNAME=_ElecRF (The first part of the generated kernel name ) - * -DLJ_EWALD_COMB_GEOM (The evdwOclFSWITCH flavour) + * -DLJ_EWALD_COMB_GEOM (The evdwTypeFSWITCH flavour) * -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name ) * * prune/energy are still generated as originally. It is only the flavour-level that has changed, so that @@ -134,7 +134,7 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw if (bFastGen) { - bool bIsEwaldSingleCutoff = (eeltype == eelOclEWALD_TAB || eeltype == eelOclEWALD_ANA); + bool bIsEwaldSingleCutoff = (eeltype == eelTypeEWALD_TAB || eeltype == eelTypeEWALD_ANA); if (bIsEwaldSingleCutoff) { diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h index 8462f20eb4..a1db11c930 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h @@ -53,6 +53,7 @@ #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/nbnxm/gpu_types_common.h" #include "gromacs/nbnxm/nbnxm.h" +#include "gromacs/nbnxm/nbnxm_gpu.h" #include "gromacs/nbnxm/pairlist.h" #include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/fatalerror.h" @@ -77,52 +78,6 @@ struct gmx_wallclock_gpu_nbnxn_t; const int c_oclPruneKernelJ4ConcurrencyDEFAULT = GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY_DEFAULT; /*! @} */ -/*! \brief Electrostatic OpenCL kernel flavors. - * - * Types of electrostatics implementations available in the OpenCL non-bonded - * force kernels. These represent both the electrostatics types implemented - * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in - * enums.h) as well as encode implementation details analytical/tabulated - * and single or twin cut-off (for Ewald kernels). - * Note that the cut-off and RF kernels have only analytical flavor and unlike - * in the CPU kernels, the tabulated kernels are ATM Ewald-only. - * - * The row-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum eelOcl -{ - eelOclCUT, - eelOclRF, - eelOclEWALD_TAB, - eelOclEWALD_TAB_TWIN, - eelOclEWALD_ANA, - eelOclEWALD_ANA_TWIN, - eelOclNR -}; - -/*! \brief VdW OpenCL kernel flavors. - * - * The enumerates values correspond to the LJ implementations in the OpenCL non-bonded - * kernels. - * - * The column-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum evdwOcl -{ - evdwOclCUT, - evdwOclCUTCOMBGEOM, - evdwOclCUTCOMBLB, - evdwOclFSWITCH, - evdwOclPSWITCH, - evdwOclEWALDGEOM, - evdwOclEWALDLB, - evdwOclNR -}; - /*! \brief Pruning kernel flavors. * * The values correspond to the first call of the pruning post-list generation @@ -197,9 +152,9 @@ typedef struct cl_atomdata typedef struct cl_nbparam { - //! type of electrostatics, takes values from #eelOcl + //! type of electrostatics, takes values from #eelType int eeltype; - //! type of VdW impl., takes values from #evdwOcl + //! type of VdW impl., takes values from #evdwType int vdwtype; //! charge multiplication factor @@ -259,9 +214,9 @@ typedef struct cl_nbparam typedef struct cl_nbparam_params { - //! type of electrostatics, takes values from #eelCu + //! type of electrostatics, takes values from #eelType int eeltype; - //! type of VdW impl., takes values from #evdwCu + //! type of VdW impl., takes values from #evdwType int vdwtype; //! charge multiplication factor @@ -330,10 +285,10 @@ struct NbnxmGpu /**< Pointers to non-bonded kernel functions * organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */ ///@{ - cl_kernel kernel_noener_noprune_ptr[eelOclNR][evdwOclNR] = { { nullptr } }; - cl_kernel kernel_ener_noprune_ptr[eelOclNR][evdwOclNR] = { { nullptr } }; - cl_kernel kernel_noener_prune_ptr[eelOclNR][evdwOclNR] = { { nullptr } }; - cl_kernel kernel_ener_prune_ptr[eelOclNR][evdwOclNR] = { { nullptr } }; + cl_kernel kernel_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; + cl_kernel kernel_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; + cl_kernel kernel_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; + cl_kernel kernel_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; ///@} //! prune kernels, ePruneKind defined the kernel kinds cl_kernel kernel_pruneonly[ePruneNR] = { nullptr }; -- 2.22.0