From e85ce7b7fc95e77948bdcd841eda47cfd2e60241 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Wed, 11 Nov 2020 12:36:21 +0000 Subject: [PATCH] Make eelType and evdwType scoped enums, + cleanup - Make enums eelType and evdwType scoped, explicitly type relevant arguments. - Remove code duplication between OpenCL and CUDA related to choosing the proper value of these enums. - Remove declarations of two never defined functions from `src/gromacs/nbnxm/nbnxm.h`. - Add names for ljcr (LJ comb. rules) enum values. --- src/gromacs/mdtypes/md_enums.cpp | 2 +- src/gromacs/mdtypes/md_enums.h | 2 +- src/gromacs/nbnxm/atomdata.cpp | 8 +- src/gromacs/nbnxm/atomdata.h | 3 + src/gromacs/nbnxm/cuda/nbnxm_cuda.cu | 37 ++++---- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 83 +++-------------- src/gromacs/nbnxm/gpu_data_mgmt.h | 12 +++ src/gromacs/nbnxm/gpu_types_common.h | 9 +- src/gromacs/nbnxm/nbnxm.h | 71 ++++++++++----- src/gromacs/nbnxm/nbnxm_gpu.h | 54 ++--------- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 89 +++++++++++++++++-- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h | 2 +- src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp | 46 +++++----- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 80 ++--------------- .../nbnxm/opencl/nbnxm_ocl_jit_support.cpp | 25 +++--- .../nbnxm/opencl/nbnxm_ocl_kernel_utils.clh | 8 +- src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h | 18 ++-- 17 files changed, 250 insertions(+), 299 deletions(-) diff --git a/src/gromacs/mdtypes/md_enums.cpp b/src/gromacs/mdtypes/md_enums.cpp index 65eef2ff80..0fdb10091b 100644 --- a/src/gromacs/mdtypes/md_enums.cpp +++ b/src/gromacs/mdtypes/md_enums.cpp @@ -39,7 +39,7 @@ #include "md_enums.h" -const char* enum_name(int index, int max_index, const char* names[]) +const char* enum_name(int index, int max_index, const char* const names[]) { if (index < 0 || index >= max_index) { diff --git a/src/gromacs/mdtypes/md_enums.h b/src/gromacs/mdtypes/md_enums.h index 508a94f6bf..7631433612 100644 --- a/src/gromacs/mdtypes/md_enums.h +++ b/src/gromacs/mdtypes/md_enums.h @@ -58,7 +58,7 @@ * \param[in] names The array * \return the correct string or "no name defined" */ -const char* enum_name(int index, int max_index, const char* names[]); +const char* enum_name(int index, int max_index, const char* const names[]); //! Boolean strings no or yes extern const char* yesno_names[BOOL_NR + 1]; diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index dd4eddd0ca..76b394c98b 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -52,6 +52,7 @@ #include "gromacs/math/vec.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_* +#include "gromacs/mdtypes/md_enums.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/simd/simd.h" @@ -71,6 +72,8 @@ using namespace gmx; // TODO: Remove when this file is moved into gmx namespace +const char* const c_ljcrNames[ljcrNR + 1] = { "none", "geometric", "Lorentz-Berthelot", nullptr }; + void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms) { numAtoms_ = numAtoms; @@ -580,9 +583,8 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger& mdlog, } else { - mesg = gmx::formatString( - "Using %s Lennard-Jones combination rule", - params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot"); + mesg = gmx::formatString("Using %s Lennard-Jones combination rule", + enum_name(params->comb_rule, ljcrNR, c_ljcrNames)); } GMX_LOG(mdlog.info).asParagraph().appendText(mesg); } diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index df3362b114..6f988eb14f 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -164,6 +164,9 @@ enum ljcrNR }; +//! String corresponding to LJ combination rule +extern const char* const c_ljcrNames[ljcrNR + 1]; + /*! \internal * \brief Struct that stores atom related data for the nbnxn module * diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 48f02fde9e..29ec2288a9 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -164,7 +164,7 @@ static inline int calc_nb_kernel_nblock(int nwork_units, const DeviceInformation */ /*! Force-only kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = { { nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda, @@ -192,7 +192,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTyp }; /*! Force + energy kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = { { nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda, @@ -220,7 +220,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeN }; /*! Force + pruning kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[c_numElecTypes][c_numVdwTypes] = { { nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda, @@ -250,7 +250,7 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeN }; /*! Force + energy + pruning kernel function pointers. */ -static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { +static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[c_numElecTypes][c_numVdwTypes] = { { nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda, @@ -282,17 +282,18 @@ static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] }; /*! Return a pointer to the kernel version to be executed at the current step. */ -static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int eeltype, - int evdwtype, +static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(enum ElecType elecType, + enum VdwType vdwType, bool bDoEne, bool bDoPrune, const DeviceInformation gmx_unused* deviceInfo) { - nbnxn_cu_kfunc_ptr_t res; + const int elecTypeIdx = static_cast(elecType); + const int vdwTypeIdx = static_cast(vdwType); - GMX_ASSERT(eeltype < eelTypeNR, + GMX_ASSERT(elecTypeIdx < c_numElecTypes, "The electrostatics type requested is not implemented in the CUDA kernels."); - GMX_ASSERT(evdwtype < evdwTypeNR, + GMX_ASSERT(vdwTypeIdx < c_numVdwTypes, "The VdW type requested is not implemented in the CUDA kernels."); /* assert assumptions made by the kernels */ @@ -306,26 +307,24 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int e { if (bDoPrune) { - res = nb_kfunc_ener_prune_ptr[eeltype][evdwtype]; + return nb_kfunc_ener_prune_ptr[elecTypeIdx][vdwTypeIdx]; } else { - res = nb_kfunc_ener_noprune_ptr[eeltype][evdwtype]; + return nb_kfunc_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx]; } } else { if (bDoPrune) { - res = nb_kfunc_noener_prune_ptr[eeltype][evdwtype]; + return nb_kfunc_noener_prune_ptr[elecTypeIdx][vdwTypeIdx]; } else { - res = nb_kfunc_noener_noprune_ptr[eeltype][evdwtype]; + return nb_kfunc_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx]; } } - - return res; } /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */ @@ -344,7 +343,7 @@ static inline int calc_shmem_required_nonbonded(const int num_thre /* cj in shared memory, for each warp separately */ shmem += num_threads_z * c_nbnxnGpuClusterpairSplit * c_nbnxnGpuJgroupSize * sizeof(int); - if (nbp->vdwtype == evdwTypeCUTCOMBGEOM || nbp->vdwtype == evdwTypeCUTCOMBLB) + if (nbp->vdwType == VdwType::CutCombGeom || nbp->vdwType == VdwType::CutCombLB) { /* i-atom LJ combination parameters in shared memory */ shmem += c_nbnxnGpuNumClusterPerSupercluster * c_clSize * sizeof(float2); @@ -562,7 +561,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr; const auto kernel = - select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy, + select_nbnxn_kernel(nbp->elecType, nbp->vdwType, stepWork.computeEnergy, (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune), &nb->deviceContext_->deviceInfo()); const auto kernelArgs = @@ -817,9 +816,9 @@ void cuda_set_cacheconfig() { cudaError_t stat; - for (int i = 0; i < eelTypeNR; i++) + for (int i = 0; i < c_numElecTypes; i++) { - for (int j = 0; j < evdwTypeNR; j++) + for (int j = 0; j < c_numVdwTypes; j++) { /* Default kernel 32/32 kB Shared/L1 */ cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferEqual); diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index b1d6774a26..65ec216351 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -124,9 +124,7 @@ static void init_nbparam(NBParamGpu* nbp, const nbnxn_atomdata_t::Params& nbatParams, const DeviceContext& deviceContext) { - int ntypes; - - ntypes = nbatParams.numTypes; + const int ntypes = nbatParams.numTypes; set_cutoff_parameters(nbp, ic, listParams); @@ -139,80 +137,19 @@ static void init_nbparam(NBParamGpu* nbp, * combination is rarely used. LJ force-switch with LB rule is more common, * but gives only 1% speed-up. */ - if (ic->vdwtype == evdwCUT) - { - switch (ic->vdw_modifier) - { - case eintmodNONE: - case eintmodPOTSHIFT: - switch (nbatParams.comb_rule) - { - case ljcrNONE: nbp->vdwtype = evdwTypeCUT; break; - case ljcrGEOM: nbp->vdwtype = evdwTypeCUTCOMBGEOM; break; - case ljcrLB: nbp->vdwtype = evdwTypeCUTCOMBLB; break; - default: - gmx_incons( - "The requested LJ combination rule is not implemented in the CUDA " - "GPU accelerated kernels!"); - } - break; - case eintmodFORCESWITCH: nbp->vdwtype = evdwTypeFSWITCH; break; - case eintmodPOTSWITCH: nbp->vdwtype = evdwTypePSWITCH; break; - default: - gmx_incons( - "The requested VdW interaction modifier is not implemented in the CUDA GPU " - "accelerated kernels!"); - } - } - else if (ic->vdwtype == evdwPME) - { - if (ic->ljpme_comb_rule == ljcrGEOM) - { - assert(nbatParams.comb_rule == ljcrGEOM); - nbp->vdwtype = evdwTypeEWALDGEOM; - } - else - { - assert(nbatParams.comb_rule == ljcrLB); - nbp->vdwtype = evdwTypeEWALDLB; - } - } - else - { - gmx_incons( - "The requested VdW type is not implemented in the CUDA GPU accelerated kernels!"); - } - - if (ic->eeltype == eelCUT) - { - nbp->eeltype = eelTypeCUT; - } - else if (EEL_RF(ic->eeltype)) - { - nbp->eeltype = eelTypeRF; - } - else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) - { - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); - } - else - { - /* Shouldn't happen, as this is checked when choosing Verlet-scheme */ - gmx_incons( - "The requested electrostatics type is not implemented in the CUDA GPU accelerated " - "kernels!"); - } + nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.comb_rule); + nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic); /* generate table for PME */ nbp->coulomb_tab = nullptr; - if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN) + if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) { GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); } /* set up LJ parameter lookup table */ - if (!useLjCombRule(nbp->vdwtype)) + if (!useLjCombRule(nbp->vdwType)) { initParamLookupTable(&nbp->nbfp, &nbp->nbfp_texobj, nbatParams.nbfp.data(), 2 * ntypes * ntypes, deviceContext); @@ -412,7 +349,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) allocateDeviceBuffer(&d_atdat->f, nalloc, deviceContext); allocateDeviceBuffer(&d_atdat->xq, nalloc, deviceContext); - if (useLjCombRule(nb->nbparam->vdwtype)) + if (useLjCombRule(nb->nbparam->vdwType)) { allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext); } @@ -434,7 +371,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) nbnxn_cuda_clear_f(nb, nalloc); } - if (useLjCombRule(nb->nbparam->vdwtype)) + if (useLjCombRule(nb->nbparam->vdwType)) { static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(float2), "Size of the LJ parameters element should be equal to the size of float2."); @@ -471,7 +408,7 @@ void gpu_free(NbnxmGpu* nb) nbparam = nb->nbparam; if ((!nbparam->coulomb_tab) - && (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN)) + && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin)) { destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj); } @@ -483,12 +420,12 @@ void gpu_free(NbnxmGpu* nb) delete nb->timers; - if (!useLjCombRule(nb->nbparam->vdwtype)) + if (!useLjCombRule(nb->nbparam->vdwType)) { destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj); } - if (nbparam->vdwtype == evdwTypeEWALDGEOM || nbparam->vdwtype == evdwTypeEWALDLB) + if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB) { destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj); } diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index a472cb437d..763482badd 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -50,6 +50,8 @@ #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/mdtypes/locality.h" +#include "nbnxm.h" + struct NbnxmGpu; struct DeviceInformation; struct gmx_wallclock_gpu_nbnxn_t; @@ -124,6 +126,16 @@ int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1); GPU_FUNC_QUALIFIER bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE); +/** Return the enum value of electrostatics kernel type for given interaction parameters \p ic. */ +GPU_FUNC_QUALIFIER +enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t gmx_unused* ic) + GPU_FUNC_TERM_WITH_RETURN(ElecType::Count); + +/** Return the enum value of VdW kernel type for given \p ic and \p combRule. */ +GPU_FUNC_QUALIFIER +enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t gmx_unused* ic, int gmx_unused combRule) + GPU_FUNC_TERM_WITH_RETURN(VdwType::Count); + /** Returns an opaque pointer to the GPU command stream * Note: CUDA only. */ diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h index 9166a3e50a..57d791be2a 100644 --- a/src/gromacs/nbnxm/gpu_types_common.h +++ b/src/gromacs/nbnxm/gpu_types_common.h @@ -47,6 +47,7 @@ #include "gromacs/mdtypes/locality.h" #include "gromacs/utility/enumerationhelpers.h" +#include "nbnxm.h" #include "pairlist.h" #if GMX_GPU_OPENCL @@ -63,10 +64,10 @@ struct NBParamGpu { - //! type of electrostatics, takes values from #eelType - int eeltype; - //! type of VdW impl., takes values from #evdwType - int vdwtype; + //! type of electrostatics + enum Nbnxm::ElecType elecType; + //! type of VdW impl. + enum Nbnxm::VdwType vdwType; //! charge multiplication factor float epsfac; diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index 2596350e52..bf94b022e7 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -155,13 +155,62 @@ class StepWorkload; class UpdateGroupsCog; } // namespace gmx +//! Namespace for non-bonded kernels namespace Nbnxm { enum class KernelType; -} -namespace Nbnxm +/*! \brief Nbnxm electrostatic GPU kernel flavors. + * + * Types of electrostatics implementations available in the GPU non-bonded + * force kernels. These represent both the electrostatics types implemented + * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in + * enums.h) as well as encode implementation details analytical/tabulated + * and single or twin cut-off (for Ewald kernels). + * Note that the cut-off and RF kernels have only analytical flavor and unlike + * in the CPU kernels, the tabulated kernels are ATM Ewald-only. + * + * The row-order of pointers to different electrostatic kernels defined in + * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table + * should match the order of enumerated types below. + */ +enum class ElecType : int +{ + Cut, //!< Plain cut-off + RF, //!< Reaction field + EwaldTab, //!< Tabulated Ewald with single cut-off + EwaldTabTwin, //!< Tabulated Ewald with twin cut-off + EwaldAna, //!< Analytical Ewald with single cut-off + EwaldAnaTwin, //!< Analytical Ewald with twin cut-off + Count //!< Number of valid values +}; + +//! Number of possible \ref ElecType values. +constexpr int c_numElecTypes = static_cast(ElecType::Count); + +/*! \brief Nbnxm VdW GPU kernel flavors. + * + * The enumerates values correspond to the LJ implementations in the GPU non-bonded + * kernels. + * + * The column-order of pointers to different electrostatic kernels defined in + * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table + * should match the order of enumerated types below. + */ +enum class VdwType : int { + Cut, //!< Plain cut-off + CutCombGeom, //!< Cut-off with geometric combination rules + CutCombLB, //!< Cut-off with Lorentz-Berthelot combination rules + FSwitch, //!< Smooth force switch + PSwitch, //!< Smooth potential switch + EwaldGeom, //!< Ewald with geometric combination rules + EwaldLB, //!< Ewald with Lorentz-Berthelot combination rules + Count //!< Number of valid values +}; + +//! Number of possible \ref VdwType values. +constexpr int c_numVdwTypes = static_cast(VdwType::Count); /*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */ enum class KernelType : int @@ -236,8 +285,6 @@ public: //! Return whether the pairlist is of simple, CPU type bool pairlistIsSimple() const { return !useGpu() && !emulateGpu(); } - //! Initialize the pair list sets, TODO this should be private - void initPairlistSets(bool haveMultipleDomains); //! Returns the order of the local atoms on the grid gmx::ArrayRef getLocalAtomOrder() const; @@ -342,22 +389,6 @@ public: */ void atomdata_add_nbat_f_to_f(gmx::AtomLocality locality, gmx::ArrayRef force); - /*! \brief Add the forces stored in nbat to total force using GPU buffer opse - * - * \param [in] locality Local or non-local - * \param [in,out] totalForcesDevice Force to be added to - * \param [in] forcesPmeDevice Device buffer with PME forces - * \param[in] dependencyList List of synchronizers that represent the dependencies the reduction task needs to sync on. - * \param [in] useGpuFPmeReduction Whether PME forces should be added - * \param [in] accumulateForce If the total force buffer already contains data - */ - void atomdata_add_nbat_f_to_f_gpu(gmx::AtomLocality locality, - DeviceBuffer totalForcesDevice, - void* forcesPmeDevice, - gmx::ArrayRef dependencyList, - bool useGpuFPmeReduction, - bool accumulateForce); - /*! \brief Get the number of atoms for a given locality * * \param [in] locality Local or non-local diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index b18d16a7a4..034b0f6562 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -51,6 +51,8 @@ #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/real.h" +#include "nbnxm.h" + struct interaction_const_t; struct nbnxn_atomdata_t; struct gmx_wallcycle; @@ -62,52 +64,6 @@ class GpuBonded; class StepWorkload; } // namespace gmx -/*! \brief Nbnxm electrostatic GPU kernel flavors. - * - * Types of electrostatics implementations available in the GPU non-bonded - * force kernels. These represent both the electrostatics types implemented - * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in - * enums.h) as well as encode implementation details analytical/tabulated - * and single or twin cut-off (for Ewald kernels). - * Note that the cut-off and RF kernels have only analytical flavor and unlike - * in the CPU kernels, the tabulated kernels are ATM Ewald-only. - * - * The row-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum eelType : int -{ - eelTypeCUT, - eelTypeRF, - eelTypeEWALD_TAB, - eelTypeEWALD_TAB_TWIN, - eelTypeEWALD_ANA, - eelTypeEWALD_ANA_TWIN, - eelTypeNR -}; - -/*! \brief Nbnxm VdW GPU kernel flavors. - * - * The enumerates values correspond to the LJ implementations in the GPU non-bonded - * kernels. - * - * The column-order of pointers to different electrostatic kernels defined in - * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table - * should match the order of enumerated types below. - */ -enum evdwType : int -{ - evdwTypeCUT, - evdwTypeCUTCOMBGEOM, - evdwTypeCUTCOMBLB, - evdwTypeFSWITCH, - evdwTypePSWITCH, - evdwTypeEWALDGEOM, - evdwTypeEWALDLB, - evdwTypeNR -}; - namespace Nbnxm { @@ -115,14 +71,14 @@ class Grid; /*! \brief Returns true if LJ combination rules are used in the non-bonded kernels. * - * \param[in] vdwType The VdW interaction/implementation type as defined by evdwType + * \param[in] vdwType The VdW interaction/implementation type as defined by VdwType * enumeration. * * \returns Whether combination rules are used by the run. */ -static inline bool useLjCombRule(const int vdwType) +static inline bool useLjCombRule(const enum VdwType vdwType) { - return (vdwType == evdwTypeCUTCOMBGEOM || vdwType == evdwTypeCUTCOMBLB); + return (vdwType == VdwType::CutCombGeom || vdwType == VdwType::CutCombLB); } /*! \brief diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index 6e0d94ca2b..08b35450d3 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -58,6 +58,7 @@ #include "nbnxm_gpu_data_mgmt.h" +#include "gromacs/mdtypes/interaction_const.h" #include "gromacs/nbnxm/gpu_data_mgmt.h" #include "gromacs/timing/gpu_timing.h" #include "gromacs/utility/cstringutil.h" @@ -95,10 +96,9 @@ void inline printEnvironmentVariableDeprecationMessage(bool isEnvi } } -int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) +enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) { bool bTwinCut = (ic.rcoulomb != ic.rvdw); - int kernel_type; /* Benchmarking/development environment variables to force the use of analytical or tabulated Ewald kernel. */ @@ -153,14 +153,12 @@ int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic) forces it (use it for debugging/benchmarking only). */ if (!bTwinCut && ((getenv("GMX_GPU_NB_EWALD_TWINCUT") == nullptr) || forceTwinCutoffEwaldLegacy)) { - kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB; + return bUseAnalyticalEwald ? ElecType::EwaldAna : ElecType::EwaldTab; } else { - kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN; + return bUseAnalyticalEwald ? ElecType::EwaldAnaTwin : ElecType::EwaldTabTwin; } - - return kernel_type; } void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams) @@ -196,7 +194,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params()); - nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); + nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(*ic); GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_); @@ -327,7 +325,82 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv) bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb) { - return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN)); + return ((nb->nbparam->elecType == ElecType::EwaldAna) + || (nb->nbparam->elecType == ElecType::EwaldAnaTwin)); +} + +enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t* ic) +{ + if (ic->eeltype == eelCUT) + { + return ElecType::Cut; + } + else if (EEL_RF(ic->eeltype)) + { + return ElecType::RF; + } + else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) + { + return nbnxn_gpu_pick_ewald_kernel_type(*ic); + } + else + { + /* Shouldn't happen, as this is checked when choosing Verlet-scheme */ + GMX_THROW(gmx::InconsistentInputError( + gmx::formatString("The requested electrostatics type %s (%d) is not implemented in " + "the GPU accelerated kernels!", + EELTYPE(ic->eeltype), ic->eeltype))); + } +} + + +enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t* ic, int combRule) +{ + if (ic->vdwtype == evdwCUT) + { + switch (ic->vdw_modifier) + { + case eintmodNONE: + case eintmodPOTSHIFT: + switch (combRule) + { + case ljcrNONE: return VdwType::Cut; + case ljcrGEOM: return VdwType::CutCombGeom; + case ljcrLB: return VdwType::CutCombLB; + default: + GMX_THROW(gmx::InconsistentInputError(gmx::formatString( + "The requested LJ combination rule %s (%d) is not implemented in " + "the GPU accelerated kernels!", + enum_name(combRule, ljcrNR, c_ljcrNames), combRule))); + } + case eintmodFORCESWITCH: return VdwType::FSwitch; + case eintmodPOTSWITCH: return VdwType::PSwitch; + default: + GMX_THROW(gmx::InconsistentInputError( + gmx::formatString("The requested VdW interaction modifier %s (%d) is not " + "implemented in the GPU accelerated kernels!", + INTMODIFIER(ic->vdw_modifier), ic->vdw_modifier))); + } + } + else if (ic->vdwtype == evdwPME) + { + if (ic->ljpme_comb_rule == ljcrGEOM) + { + assert(combRule == ljcrGEOM); + return VdwType::EwaldGeom; + } + else + { + assert(combRule == ljcrLB); + return VdwType::EwaldLB; + } + } + else + { + GMX_THROW(gmx::InconsistentInputError(gmx::formatString( + "The requested VdW type %s (%d) is not implemented in the GPU accelerated kernels!", + EVDWTYPE(ic->vdwtype), ic->vdwtype))); + } } } // namespace Nbnxm diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h index 761737ddf0..e0e65e562b 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.h @@ -68,7 +68,7 @@ void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables, const DeviceContext& deviceContext); /*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */ -int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic); +enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic); /*! \brief Copies all parameters related to the cut-off from ic to nbp */ diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp index 48e8ed03ad..affbad0811 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp @@ -162,7 +162,7 @@ static inline void validate_global_work_size(const KernelLaunchConfig& config, */ /*! \brief Force-only kernel function names. */ -static const char* nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { +static const char* nb_kfunc_noener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = { { "nbnxn_kernel_ElecCut_VdwLJ_F_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_F_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_F_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_F_opencl", "nbnxn_kernel_ElecCut_VdwLJPsw_F_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_opencl", @@ -196,7 +196,7 @@ static const char* nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { }; /*! \brief Force + energy kernel function pointers. */ -static const char* nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { +static const char* nb_kfunc_ener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = { { "nbnxn_kernel_ElecCut_VdwLJ_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJPsw_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_opencl", @@ -231,7 +231,7 @@ static const char* nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { }; /*! \brief Force + pruning kernel function pointers. */ -static const char* nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { +static const char* nb_kfunc_noener_prune_ptr[c_numElecTypes][c_numVdwTypes] = { { "nbnxn_kernel_ElecCut_VdwLJ_F_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_opencl", @@ -272,7 +272,7 @@ static const char* nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { }; /*! \brief Force + energy + pruning kernel function pointers. */ -static const char* nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { +static const char* nb_kfunc_ener_prune_ptr[c_numElecTypes][c_numVdwTypes] = { { "nbnxn_kernel_ElecCut_VdwLJ_VF_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_opencl", "nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_opencl", @@ -341,41 +341,45 @@ static inline cl_kernel selectPruneKernel(cl_kernel kernel_pruneonly[], bool fir * OpenCL kernel objects are cached in nb. If the requested kernel is not * found in the cache, it will be created and the cache will be updated. */ -static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune) +static inline cl_kernel +select_nbnxn_kernel(NbnxmGpu* nb, enum ElecType elecType, enum VdwType vdwType, bool bDoEne, bool bDoPrune) { const char* kernel_name_to_run; cl_kernel* kernel_ptr; cl_int cl_error; - GMX_ASSERT(eeltype < eelTypeNR, + const int elecTypeIdx = static_cast(elecType); + const int vdwTypeIdx = static_cast(vdwType); + + GMX_ASSERT(elecTypeIdx < c_numElecTypes, "The electrostatics type requested is not implemented in the OpenCL kernels."); - GMX_ASSERT(evdwtype < evdwTypeNR, + GMX_ASSERT(vdwTypeIdx < c_numVdwTypes, "The VdW type requested is not implemented in the OpenCL kernels."); if (bDoEne) { if (bDoPrune) { - kernel_name_to_run = nb_kfunc_ener_prune_ptr[eeltype][evdwtype]; - kernel_ptr = &(nb->kernel_ener_prune_ptr[eeltype][evdwtype]); + kernel_name_to_run = nb_kfunc_ener_prune_ptr[elecTypeIdx][vdwTypeIdx]; + kernel_ptr = &(nb->kernel_ener_prune_ptr[elecTypeIdx][vdwTypeIdx]); } else { - kernel_name_to_run = nb_kfunc_ener_noprune_ptr[eeltype][evdwtype]; - kernel_ptr = &(nb->kernel_ener_noprune_ptr[eeltype][evdwtype]); + kernel_name_to_run = nb_kfunc_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx]; + kernel_ptr = &(nb->kernel_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx]); } } else { if (bDoPrune) { - kernel_name_to_run = nb_kfunc_noener_prune_ptr[eeltype][evdwtype]; - kernel_ptr = &(nb->kernel_noener_prune_ptr[eeltype][evdwtype]); + kernel_name_to_run = nb_kfunc_noener_prune_ptr[elecTypeIdx][vdwTypeIdx]; + kernel_ptr = &(nb->kernel_noener_prune_ptr[elecTypeIdx][vdwTypeIdx]); } else { - kernel_name_to_run = nb_kfunc_noener_noprune_ptr[eeltype][evdwtype]; - kernel_ptr = &(nb->kernel_noener_noprune_ptr[eeltype][evdwtype]); + kernel_name_to_run = nb_kfunc_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx]; + kernel_ptr = &(nb->kernel_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx]); } } @@ -392,7 +396,7 @@ static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwt /*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */ -static inline int calc_shmem_required_nonbonded(int vdwType, bool bPrefetchLjParam) +static inline int calc_shmem_required_nonbonded(enum VdwType vdwType, bool bPrefetchLjParam) { int shmem; @@ -438,7 +442,7 @@ static void fillin_ocl_structures(NBParamGpu* nbp, cl_nbparam_params_t* nbparams nbparams_params->coulomb_tab_scale = nbp->coulomb_tab_scale; nbparams_params->c_rf = nbp->c_rf; nbparams_params->dispersion_shift = nbp->dispersion_shift; - nbparams_params->eeltype = nbp->eeltype; + nbparams_params->elecType = nbp->elecType; nbparams_params->epsfac = nbp->epsfac; nbparams_params->ewaldcoeff_lj = nbp->ewaldcoeff_lj; nbparams_params->ewald_beta = nbp->ewald_beta; @@ -451,7 +455,7 @@ static void fillin_ocl_structures(NBParamGpu* nbp, cl_nbparam_params_t* nbparams nbparams_params->sh_ewald = nbp->sh_ewald; nbparams_params->sh_lj_ewald = nbp->sh_lj_ewald; nbparams_params->two_k_rf = nbp->two_k_rf; - nbparams_params->vdwtype = nbp->vdwtype; + nbparams_params->vdwType = nbp->vdwType; nbparams_params->vdw_switch = nbp->vdw_switch; } @@ -637,7 +641,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb /* kernel launch config */ KernelLaunchConfig config; - config.sharedMemorySize = calc_shmem_required_nonbonded(nbp->vdwtype, nb->bPrefetchLjParam); + config.sharedMemorySize = calc_shmem_required_nonbonded(nbp->vdwType, nb->bPrefetchLjParam); config.blockSize[0] = c_clSize; config.blockSize[1] = c_clSize; config.gridSize[0] = plist->nsci; @@ -660,14 +664,14 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr; constexpr char kernelName[] = "k_calc_nb"; const auto kernel = - select_nbnxn_kernel(nb, nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy, + select_nbnxn_kernel(nb, nbp->elecType, nbp->vdwType, stepWork.computeEnergy, (plist->haveFreshList && !nb->timers->interaction[iloc].didPrune)); // The OpenCL kernel takes int as second to last argument because bool is // not supported as a kernel argument type (sizeof(bool) is implementation defined). const int computeFshift = static_cast(stepWork.computeVirial); - if (useLjCombRule(nb->nbparam->vdwtype)) + if (useLjCombRule(nb->nbparam->vdwType)) { const auto kernelArgs = prepareGpuKernelArguments( kernel, config, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el, diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index 29989c8095..8197c72cf9 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -124,77 +124,6 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, const DeviceConte ad->nalloc = -1; } -/*! \brief Returns the kinds of electrostatics and Vdw OpenCL - * kernels that will be used. - * - * Respectively, these values are from enum eelOcl and enum - * evdwOcl. */ -static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t* ic, - int combRule, - int* gpu_eeltype, - int* gpu_vdwtype) -{ - if (ic->vdwtype == evdwCUT) - { - switch (ic->vdw_modifier) - { - case eintmodNONE: - case eintmodPOTSHIFT: - switch (combRule) - { - case ljcrNONE: *gpu_vdwtype = evdwTypeCUT; break; - case ljcrGEOM: *gpu_vdwtype = evdwTypeCUTCOMBGEOM; break; - case ljcrLB: *gpu_vdwtype = evdwTypeCUTCOMBLB; break; - default: - gmx_incons( - "The requested LJ combination rule is not implemented in the " - "OpenCL GPU accelerated kernels!"); - } - break; - case eintmodFORCESWITCH: *gpu_vdwtype = evdwTypeFSWITCH; break; - case eintmodPOTSWITCH: *gpu_vdwtype = evdwTypePSWITCH; break; - default: - gmx_incons( - "The requested VdW interaction modifier is not implemented in the GPU " - "accelerated kernels!"); - } - } - else if (ic->vdwtype == evdwPME) - { - if (ic->ljpme_comb_rule == ljcrGEOM) - { - *gpu_vdwtype = evdwTypeEWALDGEOM; - } - else - { - *gpu_vdwtype = evdwTypeEWALDLB; - } - } - else - { - gmx_incons("The requested VdW type is not implemented in the GPU accelerated kernels!"); - } - - if (ic->eeltype == eelCUT) - { - *gpu_eeltype = eelTypeCUT; - } - else if (EEL_RF(ic->eeltype)) - { - *gpu_eeltype = eelTypeRF; - } - else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD)) - { - *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic); - } - else - { - /* Shouldn't happen, as this is checked when choosing Verlet-scheme */ - gmx_incons( - "The requested electrostatics type is not implemented in the GPU accelerated " - "kernels!"); - } -} /*! \brief Initializes the nonbonded parameter data structure. */ @@ -206,7 +135,8 @@ static void init_nbparam(NBParamGpu* nbp, { set_cutoff_parameters(nbp, ic, listParams); - map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype), &(nbp->vdwtype)); + nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.comb_rule); + nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic); if (ic->vdwtype == evdwPME) { @@ -221,7 +151,7 @@ static void init_nbparam(NBParamGpu* nbp, } /* generate table for PME */ nbp->coulomb_tab = nullptr; - if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN) + if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin) { GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables"); init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext); @@ -517,7 +447,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) allocateDeviceBuffer(&d_atdat->f, nalloc * DIM, deviceContext); allocateDeviceBuffer(&d_atdat->xq, nalloc * (DIM + 1), deviceContext); - if (useLjCombRule(nb->nbparam->vdwtype)) + if (useLjCombRule(nb->nbparam->vdwType)) { // Two Lennard-Jones parameters per atom allocateDeviceBuffer(&d_atdat->lj_comb, nalloc * 2, deviceContext); @@ -540,7 +470,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat) nbnxn_ocl_clear_f(nb, nalloc); } - if (useLjCombRule(nb->nbparam->vdwtype)) + if (useLjCombRule(nb->nbparam->vdwType)) { GMX_ASSERT(sizeof(float) == sizeof(*nbat->params().lj_comb.data()), "Size of the LJ parameters element should be equal to the size of float2."); diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp index 18c583937b..4717e1601c 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp @@ -99,18 +99,18 @@ static const char* kernel_VdW_family_definitions[] = { /*! \brief Returns a string with the compiler defines required to avoid all flavour generation * - * For example if flavour eelTypeRF with evdwTypeFSWITCH, the output will be such that the corresponding + * For example if flavour ElecType::RF with VdwType::FSwitch, the output will be such that the corresponding * kernel flavour is generated: * -DGMX_OCL_FASTGEN (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh) - * -DEL_RF (The eelTypeRF flavour) + * -DEL_RF (The ElecType::RF flavour) * -DEELNAME=_ElecRF (The first part of the generated kernel name ) - * -DLJ_EWALD_COMB_GEOM (The evdwTypeFSWITCH flavour) + * -DLJ_EWALD_COMB_GEOM (The VdwType::FSwitch flavour) * -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name ) * * prune/energy are still generated as originally. It is only the flavour-level that has changed, so that * only the required flavour for the simulation is compiled. * - * If eeltype is single-range Ewald, then we need to add the + * If elecType is single-range Ewald, then we need to add the * twin-cutoff flavour kernels to the JIT, because PME tuning might * need it. This path sets -DGMX_OCL_FASTGEN_ADD_TWINCUT, which * triggers the use of nbnxn_ocl_kernels_fastgen_add_twincut.clh. This @@ -122,19 +122,22 @@ static const char* kernel_VdW_family_definitions[] = { * JIT defaults to compiling all kernel flavours. * * \param[in] bFastGen Whether FastGen should be used - * \param[in] eeltype Electrostatics kernel flavour for FastGen - * \param[in] vdwtype VDW kernel flavour for FastGen + * \param[in] elecType Electrostatics kernel flavour for FastGen + * \param[in] vdwType VDW kernel flavour for FastGen * \return String with the defines if FastGen is active * * \throws std::bad_alloc if out of memory */ -static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdwtype) +static std::string makeDefinesForKernelTypes(bool bFastGen, + enum Nbnxm::ElecType elecType, + enum Nbnxm::VdwType vdwType) { + using Nbnxm::ElecType; std::string defines_for_kernel_types; if (bFastGen) { - bool bIsEwaldSingleCutoff = (eeltype == eelTypeEWALD_TAB || eeltype == eelTypeEWALD_ANA); + bool bIsEwaldSingleCutoff = (elecType == ElecType::EwaldTab || elecType == ElecType::EwaldAna); if (bIsEwaldSingleCutoff) { @@ -146,8 +149,8 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw nbnxn_ocl_kernels_fastgen.clh. */ defines_for_kernel_types += "-DGMX_OCL_FASTGEN"; } - defines_for_kernel_types += kernel_electrostatic_family_definitions[eeltype]; - defines_for_kernel_types += kernel_VdW_family_definitions[vdwtype]; + defines_for_kernel_types += kernel_electrostatic_family_definitions[static_cast(elecType)]; + defines_for_kernel_types += kernel_VdW_family_definitions[static_cast(vdwType)]; } return defines_for_kernel_types; @@ -182,7 +185,7 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb) try { std::string extraDefines = - makeDefinesForKernelTypes(bFastGen, nb->nbparam->eeltype, nb->nbparam->vdwtype); + makeDefinesForKernelTypes(bFastGen, nb->nbparam->elecType, nb->nbparam->vdwType); /* Here we pass macros and static const/constexpr int variables defined * in include files outside the opencl as macros, to avoid diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh index 46c08c2d71..d0482aa7c8 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh @@ -156,10 +156,10 @@ typedef struct typedef struct cl_nbparam_params { - //! type of electrostatics, takes values from #eelCu - int eeltype; - //! type of VdW impl., takes values from #evdwCu - int vdwtype; + //! type of electrostatics, takes values from #ElecType + int elecType; + //! type of VdW impl., takes values from #VdwType + int vdwType; //! charge multiplication factor float epsfac; diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h index a517c2fd04..d1adddb205 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h @@ -153,10 +153,10 @@ typedef struct cl_atomdata typedef struct cl_nbparam_params { - //! type of electrostatics, takes values from #eelType - int eeltype; - //! type of VdW impl., takes values from #evdwType - int vdwtype; + //! type of electrostatics + enum Nbnxm::ElecType elecType; + //! type of VdW impl. + enum Nbnxm::VdwType vdwType; //! charge multiplication factor float epsfac; @@ -166,7 +166,7 @@ typedef struct cl_nbparam_params float two_k_rf; //! Ewald/PME parameter float ewald_beta; - //! Ewald/PME correction term substracted from the direct-space potential + //! Ewald/PME correction term subtracted from the direct-space potential float sh_ewald; //! LJ-Ewald/PME correction term added to the correction potential float sh_lj_ewald; @@ -219,10 +219,10 @@ struct NbnxmGpu /**< Pointers to non-bonded kernel functions * organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */ ///@{ - cl_kernel kernel_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; - cl_kernel kernel_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; - cl_kernel kernel_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; - cl_kernel kernel_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; + cl_kernel kernel_noener_noprune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } }; + cl_kernel kernel_ener_noprune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } }; + cl_kernel kernel_noener_prune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } }; + cl_kernel kernel_ener_prune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } }; ///@} //! prune kernels, ePruneKind defined the kernel kinds cl_kernel kernel_pruneonly[ePruneNR] = { nullptr }; -- 2.22.0