- Make enums eelType and evdwType scoped, explicitly type relevant arguments.
- Remove code duplication between OpenCL and CUDA related to choosing the proper value of these enums.
- Remove declarations of two never defined functions from `src/gromacs/nbnxm/nbnxm.h`.
- Add names for ljcr (LJ comb. rules) enum values.
#include "md_enums.h"
-const char* enum_name(int index, int max_index, const char* names[])
+const char* enum_name(int index, int max_index, const char* const names[])
{
if (index < 0 || index >= max_index)
{
* \param[in] names The array
* \return the correct string or "no name defined"
*/
-const char* enum_name(int index, int max_index, const char* names[]);
+const char* enum_name(int index, int max_index, const char* const names[]);
//! Boolean strings no or yes
extern const char* yesno_names[BOOL_NR + 1];
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
+#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/simd/simd.h"
using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+const char* const c_ljcrNames[ljcrNR + 1] = { "none", "geometric", "Lorentz-Berthelot", nullptr };
+
void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
{
numAtoms_ = numAtoms;
}
else
{
- mesg = gmx::formatString(
- "Using %s Lennard-Jones combination rule",
- params->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+ mesg = gmx::formatString("Using %s Lennard-Jones combination rule",
+ enum_name(params->comb_rule, ljcrNR, c_ljcrNames));
}
GMX_LOG(mdlog.info).asParagraph().appendText(mesg);
}
ljcrNR
};
+//! String corresponding to LJ combination rule
+extern const char* const c_ljcrNames[ljcrNR + 1];
+
/*! \internal
* \brief Struct that stores atom related data for the nbnxn module
*
*/
/*! Force-only kernel function pointers. */
-static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = {
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ nbnxn_kernel_ElecCut_VdwLJ_F_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_cuda,
nbnxn_kernel_ElecCut_VdwLJCombLB_F_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_cuda,
nbnxn_kernel_ElecCut_VdwLJPsw_F_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_cuda,
};
/*! Force + energy kernel function pointers. */
-static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = {
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ nbnxn_kernel_ElecCut_VdwLJ_VF_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_cuda,
nbnxn_kernel_ElecCut_VdwLJCombLB_VF_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_cuda,
nbnxn_kernel_ElecCut_VdwLJPsw_VF_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_cuda,
};
/*! Force + pruning kernel function pointers. */
-static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = {
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_noener_prune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ nbnxn_kernel_ElecCut_VdwLJ_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_cuda,
nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_F_prune_cuda,
nbnxn_kernel_ElecCut_VdwLJPsw_F_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_prune_cuda,
};
/*! Force + energy + pruning kernel function pointers. */
-static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = {
+static const nbnxn_cu_kfunc_ptr_t nb_kfunc_ener_prune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ nbnxn_kernel_ElecCut_VdwLJ_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_cuda,
nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJFsw_VF_prune_cuda,
nbnxn_kernel_ElecCut_VdwLJPsw_VF_prune_cuda, nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_prune_cuda,
};
/*! Return a pointer to the kernel version to be executed at the current step. */
-static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int eeltype,
- int evdwtype,
+static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(enum ElecType elecType,
+ enum VdwType vdwType,
bool bDoEne,
bool bDoPrune,
const DeviceInformation gmx_unused* deviceInfo)
{
- nbnxn_cu_kfunc_ptr_t res;
+ const int elecTypeIdx = static_cast<int>(elecType);
+ const int vdwTypeIdx = static_cast<int>(vdwType);
- GMX_ASSERT(eeltype < eelTypeNR,
+ GMX_ASSERT(elecTypeIdx < c_numElecTypes,
"The electrostatics type requested is not implemented in the CUDA kernels.");
- GMX_ASSERT(evdwtype < evdwTypeNR,
+ GMX_ASSERT(vdwTypeIdx < c_numVdwTypes,
"The VdW type requested is not implemented in the CUDA kernels.");
/* assert assumptions made by the kernels */
{
if (bDoPrune)
{
- res = nb_kfunc_ener_prune_ptr[eeltype][evdwtype];
+ return nb_kfunc_ener_prune_ptr[elecTypeIdx][vdwTypeIdx];
}
else
{
- res = nb_kfunc_ener_noprune_ptr[eeltype][evdwtype];
+ return nb_kfunc_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx];
}
}
else
{
if (bDoPrune)
{
- res = nb_kfunc_noener_prune_ptr[eeltype][evdwtype];
+ return nb_kfunc_noener_prune_ptr[elecTypeIdx][vdwTypeIdx];
}
else
{
- res = nb_kfunc_noener_noprune_ptr[eeltype][evdwtype];
+ return nb_kfunc_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx];
}
}
-
- return res;
}
/*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use. */
/* cj in shared memory, for each warp separately */
shmem += num_threads_z * c_nbnxnGpuClusterpairSplit * c_nbnxnGpuJgroupSize * sizeof(int);
- if (nbp->vdwtype == evdwTypeCUTCOMBGEOM || nbp->vdwtype == evdwTypeCUTCOMBLB)
+ if (nbp->vdwType == VdwType::CutCombGeom || nbp->vdwType == VdwType::CutCombLB)
{
/* i-atom LJ combination parameters in shared memory */
shmem += c_nbnxnGpuNumClusterPerSupercluster * c_clSize * sizeof(float2);
auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
const auto kernel =
- select_nbnxn_kernel(nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
+ select_nbnxn_kernel(nbp->elecType, nbp->vdwType, stepWork.computeEnergy,
(plist->haveFreshList && !nb->timers->interaction[iloc].didPrune),
&nb->deviceContext_->deviceInfo());
const auto kernelArgs =
{
cudaError_t stat;
- for (int i = 0; i < eelTypeNR; i++)
+ for (int i = 0; i < c_numElecTypes; i++)
{
- for (int j = 0; j < evdwTypeNR; j++)
+ for (int j = 0; j < c_numVdwTypes; j++)
{
/* Default kernel 32/32 kB Shared/L1 */
cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferEqual);
const nbnxn_atomdata_t::Params& nbatParams,
const DeviceContext& deviceContext)
{
- int ntypes;
-
- ntypes = nbatParams.numTypes;
+ const int ntypes = nbatParams.numTypes;
set_cutoff_parameters(nbp, ic, listParams);
* combination is rarely used. LJ force-switch with LB rule is more common,
* but gives only 1% speed-up.
*/
- if (ic->vdwtype == evdwCUT)
- {
- switch (ic->vdw_modifier)
- {
- case eintmodNONE:
- case eintmodPOTSHIFT:
- switch (nbatParams.comb_rule)
- {
- case ljcrNONE: nbp->vdwtype = evdwTypeCUT; break;
- case ljcrGEOM: nbp->vdwtype = evdwTypeCUTCOMBGEOM; break;
- case ljcrLB: nbp->vdwtype = evdwTypeCUTCOMBLB; break;
- default:
- gmx_incons(
- "The requested LJ combination rule is not implemented in the CUDA "
- "GPU accelerated kernels!");
- }
- break;
- case eintmodFORCESWITCH: nbp->vdwtype = evdwTypeFSWITCH; break;
- case eintmodPOTSWITCH: nbp->vdwtype = evdwTypePSWITCH; break;
- default:
- gmx_incons(
- "The requested VdW interaction modifier is not implemented in the CUDA GPU "
- "accelerated kernels!");
- }
- }
- else if (ic->vdwtype == evdwPME)
- {
- if (ic->ljpme_comb_rule == ljcrGEOM)
- {
- assert(nbatParams.comb_rule == ljcrGEOM);
- nbp->vdwtype = evdwTypeEWALDGEOM;
- }
- else
- {
- assert(nbatParams.comb_rule == ljcrLB);
- nbp->vdwtype = evdwTypeEWALDLB;
- }
- }
- else
- {
- gmx_incons(
- "The requested VdW type is not implemented in the CUDA GPU accelerated kernels!");
- }
-
- if (ic->eeltype == eelCUT)
- {
- nbp->eeltype = eelTypeCUT;
- }
- else if (EEL_RF(ic->eeltype))
- {
- nbp->eeltype = eelTypeRF;
- }
- else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
- {
- nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
- }
- else
- {
- /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
- gmx_incons(
- "The requested electrostatics type is not implemented in the CUDA GPU accelerated "
- "kernels!");
- }
+ nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.comb_rule);
+ nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic);
/* generate table for PME */
nbp->coulomb_tab = nullptr;
- if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN)
+ if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin)
{
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext);
}
/* set up LJ parameter lookup table */
- if (!useLjCombRule(nbp->vdwtype))
+ if (!useLjCombRule(nbp->vdwType))
{
initParamLookupTable(&nbp->nbfp, &nbp->nbfp_texobj, nbatParams.nbfp.data(),
2 * ntypes * ntypes, deviceContext);
allocateDeviceBuffer(&d_atdat->f, nalloc, deviceContext);
allocateDeviceBuffer(&d_atdat->xq, nalloc, deviceContext);
- if (useLjCombRule(nb->nbparam->vdwtype))
+ if (useLjCombRule(nb->nbparam->vdwType))
{
allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext);
}
nbnxn_cuda_clear_f(nb, nalloc);
}
- if (useLjCombRule(nb->nbparam->vdwtype))
+ if (useLjCombRule(nb->nbparam->vdwType))
{
static_assert(sizeof(d_atdat->lj_comb[0]) == sizeof(float2),
"Size of the LJ parameters element should be equal to the size of float2.");
nbparam = nb->nbparam;
if ((!nbparam->coulomb_tab)
- && (nbparam->eeltype == eelTypeEWALD_TAB || nbparam->eeltype == eelTypeEWALD_TAB_TWIN))
+ && (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin))
{
destroyParamLookupTable(&nbparam->coulomb_tab, nbparam->coulomb_tab_texobj);
}
delete nb->timers;
- if (!useLjCombRule(nb->nbparam->vdwtype))
+ if (!useLjCombRule(nb->nbparam->vdwType))
{
destroyParamLookupTable(&nbparam->nbfp, nbparam->nbfp_texobj);
}
- if (nbparam->vdwtype == evdwTypeEWALDGEOM || nbparam->vdwtype == evdwTypeEWALDLB)
+ if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB)
{
destroyParamLookupTable(&nbparam->nbfp_comb, nbparam->nbfp_comb_texobj);
}
#include "gromacs/gpu_utils/gpu_macros.h"
#include "gromacs/mdtypes/locality.h"
+#include "nbnxm.h"
+
struct NbnxmGpu;
struct DeviceInformation;
struct gmx_wallclock_gpu_nbnxn_t;
GPU_FUNC_QUALIFIER
bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
+/** Return the enum value of electrostatics kernel type for given interaction parameters \p ic. */
+GPU_FUNC_QUALIFIER
+enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t gmx_unused* ic)
+ GPU_FUNC_TERM_WITH_RETURN(ElecType::Count);
+
+/** Return the enum value of VdW kernel type for given \p ic and \p combRule. */
+GPU_FUNC_QUALIFIER
+enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t gmx_unused* ic, int gmx_unused combRule)
+ GPU_FUNC_TERM_WITH_RETURN(VdwType::Count);
+
/** Returns an opaque pointer to the GPU command stream
* Note: CUDA only.
*/
#include "gromacs/mdtypes/locality.h"
#include "gromacs/utility/enumerationhelpers.h"
+#include "nbnxm.h"
#include "pairlist.h"
#if GMX_GPU_OPENCL
struct NBParamGpu
{
- //! type of electrostatics, takes values from #eelType
- int eeltype;
- //! type of VdW impl., takes values from #evdwType
- int vdwtype;
+ //! type of electrostatics
+ enum Nbnxm::ElecType elecType;
+ //! type of VdW impl.
+ enum Nbnxm::VdwType vdwType;
//! charge multiplication factor
float epsfac;
class UpdateGroupsCog;
} // namespace gmx
+//! Namespace for non-bonded kernels
namespace Nbnxm
{
enum class KernelType;
-}
-namespace Nbnxm
+/*! \brief Nbnxm electrostatic GPU kernel flavors.
+ *
+ * Types of electrostatics implementations available in the GPU non-bonded
+ * force kernels. These represent both the electrostatics types implemented
+ * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in
+ * enums.h) as well as encode implementation details analytical/tabulated
+ * and single or twin cut-off (for Ewald kernels).
+ * Note that the cut-off and RF kernels have only analytical flavor and unlike
+ * in the CPU kernels, the tabulated kernels are ATM Ewald-only.
+ *
+ * The row-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum class ElecType : int
+{
+ Cut, //!< Plain cut-off
+ RF, //!< Reaction field
+ EwaldTab, //!< Tabulated Ewald with single cut-off
+ EwaldTabTwin, //!< Tabulated Ewald with twin cut-off
+ EwaldAna, //!< Analytical Ewald with single cut-off
+ EwaldAnaTwin, //!< Analytical Ewald with twin cut-off
+ Count //!< Number of valid values
+};
+
+//! Number of possible \ref ElecType values.
+constexpr int c_numElecTypes = static_cast<int>(ElecType::Count);
+
+/*! \brief Nbnxm VdW GPU kernel flavors.
+ *
+ * The enumerates values correspond to the LJ implementations in the GPU non-bonded
+ * kernels.
+ *
+ * The column-order of pointers to different electrostatic kernels defined in
+ * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table
+ * should match the order of enumerated types below.
+ */
+enum class VdwType : int
{
+ Cut, //!< Plain cut-off
+ CutCombGeom, //!< Cut-off with geometric combination rules
+ CutCombLB, //!< Cut-off with Lorentz-Berthelot combination rules
+ FSwitch, //!< Smooth force switch
+ PSwitch, //!< Smooth potential switch
+ EwaldGeom, //!< Ewald with geometric combination rules
+ EwaldLB, //!< Ewald with Lorentz-Berthelot combination rules
+ Count //!< Number of valid values
+};
+
+//! Number of possible \ref VdwType values.
+constexpr int c_numVdwTypes = static_cast<int>(VdwType::Count);
/*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */
enum class KernelType : int
//! Return whether the pairlist is of simple, CPU type
bool pairlistIsSimple() const { return !useGpu() && !emulateGpu(); }
- //! Initialize the pair list sets, TODO this should be private
- void initPairlistSets(bool haveMultipleDomains);
//! Returns the order of the local atoms on the grid
gmx::ArrayRef<const int> getLocalAtomOrder() const;
*/
void atomdata_add_nbat_f_to_f(gmx::AtomLocality locality, gmx::ArrayRef<gmx::RVec> force);
- /*! \brief Add the forces stored in nbat to total force using GPU buffer opse
- *
- * \param [in] locality Local or non-local
- * \param [in,out] totalForcesDevice Force to be added to
- * \param [in] forcesPmeDevice Device buffer with PME forces
- * \param[in] dependencyList List of synchronizers that represent the dependencies the reduction task needs to sync on.
- * \param [in] useGpuFPmeReduction Whether PME forces should be added
- * \param [in] accumulateForce If the total force buffer already contains data
- */
- void atomdata_add_nbat_f_to_f_gpu(gmx::AtomLocality locality,
- DeviceBuffer<gmx::RVec> totalForcesDevice,
- void* forcesPmeDevice,
- gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
- bool useGpuFPmeReduction,
- bool accumulateForce);
-
/*! \brief Get the number of atoms for a given locality
*
* \param [in] locality Local or non-local
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
+#include "nbnxm.h"
+
struct interaction_const_t;
struct nbnxn_atomdata_t;
struct gmx_wallcycle;
class StepWorkload;
} // namespace gmx
-/*! \brief Nbnxm electrostatic GPU kernel flavors.
- *
- * Types of electrostatics implementations available in the GPU non-bonded
- * force kernels. These represent both the electrostatics types implemented
- * by the kernels (cut-off, RF, and Ewald - a subset of what's defined in
- * enums.h) as well as encode implementation details analytical/tabulated
- * and single or twin cut-off (for Ewald kernels).
- * Note that the cut-off and RF kernels have only analytical flavor and unlike
- * in the CPU kernels, the tabulated kernels are ATM Ewald-only.
- *
- * The row-order of pointers to different electrostatic kernels defined in
- * nbnxn_cuda.cu by the nb_*_kfunc_ptr function pointer table
- * should match the order of enumerated types below.
- */
-enum eelType : int
-{
- eelTypeCUT,
- eelTypeRF,
- eelTypeEWALD_TAB,
- eelTypeEWALD_TAB_TWIN,
- eelTypeEWALD_ANA,
- eelTypeEWALD_ANA_TWIN,
- eelTypeNR
-};
-
-/*! \brief Nbnxm VdW GPU kernel flavors.
- *
- * The enumerates values correspond to the LJ implementations in the GPU non-bonded
- * kernels.
- *
- * The column-order of pointers to different electrostatic kernels defined in
- * nbnxn_cuda_ocl.cpp/.cu by the nb_*_kfunc_ptr function pointer table
- * should match the order of enumerated types below.
- */
-enum evdwType : int
-{
- evdwTypeCUT,
- evdwTypeCUTCOMBGEOM,
- evdwTypeCUTCOMBLB,
- evdwTypeFSWITCH,
- evdwTypePSWITCH,
- evdwTypeEWALDGEOM,
- evdwTypeEWALDLB,
- evdwTypeNR
-};
-
namespace Nbnxm
{
/*! \brief Returns true if LJ combination rules are used in the non-bonded kernels.
*
- * \param[in] vdwType The VdW interaction/implementation type as defined by evdwType
+ * \param[in] vdwType The VdW interaction/implementation type as defined by VdwType
* enumeration.
*
* \returns Whether combination rules are used by the run.
*/
-static inline bool useLjCombRule(const int vdwType)
+static inline bool useLjCombRule(const enum VdwType vdwType)
{
- return (vdwType == evdwTypeCUTCOMBGEOM || vdwType == evdwTypeCUTCOMBLB);
+ return (vdwType == VdwType::CutCombGeom || vdwType == VdwType::CutCombLB);
}
/*! \brief
#include "nbnxm_gpu_data_mgmt.h"
+#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/timing/gpu_timing.h"
#include "gromacs/utility/cstringutil.h"
}
}
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
+enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
{
bool bTwinCut = (ic.rcoulomb != ic.rvdw);
- int kernel_type;
/* Benchmarking/development environment variables to force the use of
analytical or tabulated Ewald kernel. */
forces it (use it for debugging/benchmarking only). */
if (!bTwinCut && ((getenv("GMX_GPU_NB_EWALD_TWINCUT") == nullptr) || forceTwinCutoffEwaldLegacy))
{
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB;
+ return bUseAnalyticalEwald ? ElecType::EwaldAna : ElecType::EwaldTab;
}
else
{
- kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN;
+ return bUseAnalyticalEwald ? ElecType::EwaldAnaTwin : ElecType::EwaldTabTwin;
}
-
- return kernel_type;
}
void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
- nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
+ nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(*ic);
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
{
- return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN));
+ return ((nb->nbparam->elecType == ElecType::EwaldAna)
+ || (nb->nbparam->elecType == ElecType::EwaldAnaTwin));
+}
+
+enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t* ic)
+{
+ if (ic->eeltype == eelCUT)
+ {
+ return ElecType::Cut;
+ }
+ else if (EEL_RF(ic->eeltype))
+ {
+ return ElecType::RF;
+ }
+ else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
+ {
+ return nbnxn_gpu_pick_ewald_kernel_type(*ic);
+ }
+ else
+ {
+ /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
+ GMX_THROW(gmx::InconsistentInputError(
+ gmx::formatString("The requested electrostatics type %s (%d) is not implemented in "
+ "the GPU accelerated kernels!",
+ EELTYPE(ic->eeltype), ic->eeltype)));
+ }
+}
+
+
+enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t* ic, int combRule)
+{
+ if (ic->vdwtype == evdwCUT)
+ {
+ switch (ic->vdw_modifier)
+ {
+ case eintmodNONE:
+ case eintmodPOTSHIFT:
+ switch (combRule)
+ {
+ case ljcrNONE: return VdwType::Cut;
+ case ljcrGEOM: return VdwType::CutCombGeom;
+ case ljcrLB: return VdwType::CutCombLB;
+ default:
+ GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+ "The requested LJ combination rule %s (%d) is not implemented in "
+ "the GPU accelerated kernels!",
+ enum_name(combRule, ljcrNR, c_ljcrNames), combRule)));
+ }
+ case eintmodFORCESWITCH: return VdwType::FSwitch;
+ case eintmodPOTSWITCH: return VdwType::PSwitch;
+ default:
+ GMX_THROW(gmx::InconsistentInputError(
+ gmx::formatString("The requested VdW interaction modifier %s (%d) is not "
+ "implemented in the GPU accelerated kernels!",
+ INTMODIFIER(ic->vdw_modifier), ic->vdw_modifier)));
+ }
+ }
+ else if (ic->vdwtype == evdwPME)
+ {
+ if (ic->ljpme_comb_rule == ljcrGEOM)
+ {
+ assert(combRule == ljcrGEOM);
+ return VdwType::EwaldGeom;
+ }
+ else
+ {
+ assert(combRule == ljcrLB);
+ return VdwType::EwaldLB;
+ }
+ }
+ else
+ {
+ GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+ "The requested VdW type %s (%d) is not implemented in the GPU accelerated kernels!",
+ EVDWTYPE(ic->vdwtype), ic->vdwtype)));
+ }
}
} // namespace Nbnxm
const DeviceContext& deviceContext);
/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
-int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic);
+enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic);
/*! \brief Copies all parameters related to the cut-off from ic to nbp
*/
*/
/*! \brief Force-only kernel function names. */
-static const char* nb_kfunc_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = {
+static const char* nb_kfunc_noener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ "nbnxn_kernel_ElecCut_VdwLJ_F_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_F_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombLB_F_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_F_opencl",
"nbnxn_kernel_ElecCut_VdwLJPsw_F_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_F_opencl",
};
/*! \brief Force + energy kernel function pointers. */
-static const char* nb_kfunc_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = {
+static const char* nb_kfunc_ener_noprune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ "nbnxn_kernel_ElecCut_VdwLJ_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombLB_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJFsw_VF_opencl",
"nbnxn_kernel_ElecCut_VdwLJPsw_VF_opencl", "nbnxn_kernel_ElecCut_VdwLJEwCombGeom_VF_opencl",
};
/*! \brief Force + pruning kernel function pointers. */
-static const char* nb_kfunc_noener_prune_ptr[eelTypeNR][evdwTypeNR] = {
+static const char* nb_kfunc_noener_prune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ "nbnxn_kernel_ElecCut_VdwLJ_F_prune_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombGeom_F_prune_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombLB_F_prune_opencl",
};
/*! \brief Force + energy + pruning kernel function pointers. */
-static const char* nb_kfunc_ener_prune_ptr[eelTypeNR][evdwTypeNR] = {
+static const char* nb_kfunc_ener_prune_ptr[c_numElecTypes][c_numVdwTypes] = {
{ "nbnxn_kernel_ElecCut_VdwLJ_VF_prune_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombGeom_VF_prune_opencl",
"nbnxn_kernel_ElecCut_VdwLJCombLB_VF_prune_opencl",
* OpenCL kernel objects are cached in nb. If the requested kernel is not
* found in the cache, it will be created and the cache will be updated.
*/
-static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
+static inline cl_kernel
+select_nbnxn_kernel(NbnxmGpu* nb, enum ElecType elecType, enum VdwType vdwType, bool bDoEne, bool bDoPrune)
{
const char* kernel_name_to_run;
cl_kernel* kernel_ptr;
cl_int cl_error;
- GMX_ASSERT(eeltype < eelTypeNR,
+ const int elecTypeIdx = static_cast<int>(elecType);
+ const int vdwTypeIdx = static_cast<int>(vdwType);
+
+ GMX_ASSERT(elecTypeIdx < c_numElecTypes,
"The electrostatics type requested is not implemented in the OpenCL kernels.");
- GMX_ASSERT(evdwtype < evdwTypeNR,
+ GMX_ASSERT(vdwTypeIdx < c_numVdwTypes,
"The VdW type requested is not implemented in the OpenCL kernels.");
if (bDoEne)
{
if (bDoPrune)
{
- kernel_name_to_run = nb_kfunc_ener_prune_ptr[eeltype][evdwtype];
- kernel_ptr = &(nb->kernel_ener_prune_ptr[eeltype][evdwtype]);
+ kernel_name_to_run = nb_kfunc_ener_prune_ptr[elecTypeIdx][vdwTypeIdx];
+ kernel_ptr = &(nb->kernel_ener_prune_ptr[elecTypeIdx][vdwTypeIdx]);
}
else
{
- kernel_name_to_run = nb_kfunc_ener_noprune_ptr[eeltype][evdwtype];
- kernel_ptr = &(nb->kernel_ener_noprune_ptr[eeltype][evdwtype]);
+ kernel_name_to_run = nb_kfunc_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx];
+ kernel_ptr = &(nb->kernel_ener_noprune_ptr[elecTypeIdx][vdwTypeIdx]);
}
}
else
{
if (bDoPrune)
{
- kernel_name_to_run = nb_kfunc_noener_prune_ptr[eeltype][evdwtype];
- kernel_ptr = &(nb->kernel_noener_prune_ptr[eeltype][evdwtype]);
+ kernel_name_to_run = nb_kfunc_noener_prune_ptr[elecTypeIdx][vdwTypeIdx];
+ kernel_ptr = &(nb->kernel_noener_prune_ptr[elecTypeIdx][vdwTypeIdx]);
}
else
{
- kernel_name_to_run = nb_kfunc_noener_noprune_ptr[eeltype][evdwtype];
- kernel_ptr = &(nb->kernel_noener_noprune_ptr[eeltype][evdwtype]);
+ kernel_name_to_run = nb_kfunc_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx];
+ kernel_ptr = &(nb->kernel_noener_noprune_ptr[elecTypeIdx][vdwTypeIdx]);
}
}
/*! \brief Calculates the amount of shared memory required by the nonbonded kernel in use.
*/
-static inline int calc_shmem_required_nonbonded(int vdwType, bool bPrefetchLjParam)
+static inline int calc_shmem_required_nonbonded(enum VdwType vdwType, bool bPrefetchLjParam)
{
int shmem;
nbparams_params->coulomb_tab_scale = nbp->coulomb_tab_scale;
nbparams_params->c_rf = nbp->c_rf;
nbparams_params->dispersion_shift = nbp->dispersion_shift;
- nbparams_params->eeltype = nbp->eeltype;
+ nbparams_params->elecType = nbp->elecType;
nbparams_params->epsfac = nbp->epsfac;
nbparams_params->ewaldcoeff_lj = nbp->ewaldcoeff_lj;
nbparams_params->ewald_beta = nbp->ewald_beta;
nbparams_params->sh_ewald = nbp->sh_ewald;
nbparams_params->sh_lj_ewald = nbp->sh_lj_ewald;
nbparams_params->two_k_rf = nbp->two_k_rf;
- nbparams_params->vdwtype = nbp->vdwtype;
+ nbparams_params->vdwType = nbp->vdwType;
nbparams_params->vdw_switch = nbp->vdw_switch;
}
/* kernel launch config */
KernelLaunchConfig config;
- config.sharedMemorySize = calc_shmem_required_nonbonded(nbp->vdwtype, nb->bPrefetchLjParam);
+ config.sharedMemorySize = calc_shmem_required_nonbonded(nbp->vdwType, nb->bPrefetchLjParam);
config.blockSize[0] = c_clSize;
config.blockSize[1] = c_clSize;
config.gridSize[0] = plist->nsci;
auto* timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
constexpr char kernelName[] = "k_calc_nb";
const auto kernel =
- select_nbnxn_kernel(nb, nbp->eeltype, nbp->vdwtype, stepWork.computeEnergy,
+ select_nbnxn_kernel(nb, nbp->elecType, nbp->vdwType, stepWork.computeEnergy,
(plist->haveFreshList && !nb->timers->interaction[iloc].didPrune));
// The OpenCL kernel takes int as second to last argument because bool is
// not supported as a kernel argument type (sizeof(bool) is implementation defined).
const int computeFshift = static_cast<int>(stepWork.computeVirial);
- if (useLjCombRule(nb->nbparam->vdwtype))
+ if (useLjCombRule(nb->nbparam->vdwType))
{
const auto kernelArgs = prepareGpuKernelArguments(
kernel, config, &nbparams_params, &adat->xq, &adat->f, &adat->e_lj, &adat->e_el,
ad->nalloc = -1;
}
-/*! \brief Returns the kinds of electrostatics and Vdw OpenCL
- * kernels that will be used.
- *
- * Respectively, these values are from enum eelOcl and enum
- * evdwOcl. */
-static void map_interaction_types_to_gpu_kernel_flavors(const interaction_const_t* ic,
- int combRule,
- int* gpu_eeltype,
- int* gpu_vdwtype)
-{
- if (ic->vdwtype == evdwCUT)
- {
- switch (ic->vdw_modifier)
- {
- case eintmodNONE:
- case eintmodPOTSHIFT:
- switch (combRule)
- {
- case ljcrNONE: *gpu_vdwtype = evdwTypeCUT; break;
- case ljcrGEOM: *gpu_vdwtype = evdwTypeCUTCOMBGEOM; break;
- case ljcrLB: *gpu_vdwtype = evdwTypeCUTCOMBLB; break;
- default:
- gmx_incons(
- "The requested LJ combination rule is not implemented in the "
- "OpenCL GPU accelerated kernels!");
- }
- break;
- case eintmodFORCESWITCH: *gpu_vdwtype = evdwTypeFSWITCH; break;
- case eintmodPOTSWITCH: *gpu_vdwtype = evdwTypePSWITCH; break;
- default:
- gmx_incons(
- "The requested VdW interaction modifier is not implemented in the GPU "
- "accelerated kernels!");
- }
- }
- else if (ic->vdwtype == evdwPME)
- {
- if (ic->ljpme_comb_rule == ljcrGEOM)
- {
- *gpu_vdwtype = evdwTypeEWALDGEOM;
- }
- else
- {
- *gpu_vdwtype = evdwTypeEWALDLB;
- }
- }
- else
- {
- gmx_incons("The requested VdW type is not implemented in the GPU accelerated kernels!");
- }
-
- if (ic->eeltype == eelCUT)
- {
- *gpu_eeltype = eelTypeCUT;
- }
- else if (EEL_RF(ic->eeltype))
- {
- *gpu_eeltype = eelTypeRF;
- }
- else if ((EEL_PME(ic->eeltype) || ic->eeltype == eelEWALD))
- {
- *gpu_eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
- }
- else
- {
- /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
- gmx_incons(
- "The requested electrostatics type is not implemented in the GPU accelerated "
- "kernels!");
- }
-}
/*! \brief Initializes the nonbonded parameter data structure.
*/
{
set_cutoff_parameters(nbp, ic, listParams);
- map_interaction_types_to_gpu_kernel_flavors(ic, nbatParams.comb_rule, &(nbp->eeltype), &(nbp->vdwtype));
+ nbp->vdwType = nbnxmGpuPickVdwKernelType(ic, nbatParams.comb_rule);
+ nbp->elecType = nbnxmGpuPickElectrostaticsKernelType(ic);
if (ic->vdwtype == evdwPME)
{
}
/* generate table for PME */
nbp->coulomb_tab = nullptr;
- if (nbp->eeltype == eelTypeEWALD_TAB || nbp->eeltype == eelTypeEWALD_TAB_TWIN)
+ if (nbp->elecType == ElecType::EwaldTab || nbp->elecType == ElecType::EwaldTabTwin)
{
GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, deviceContext);
allocateDeviceBuffer(&d_atdat->f, nalloc * DIM, deviceContext);
allocateDeviceBuffer(&d_atdat->xq, nalloc * (DIM + 1), deviceContext);
- if (useLjCombRule(nb->nbparam->vdwtype))
+ if (useLjCombRule(nb->nbparam->vdwType))
{
// Two Lennard-Jones parameters per atom
allocateDeviceBuffer(&d_atdat->lj_comb, nalloc * 2, deviceContext);
nbnxn_ocl_clear_f(nb, nalloc);
}
- if (useLjCombRule(nb->nbparam->vdwtype))
+ if (useLjCombRule(nb->nbparam->vdwType))
{
GMX_ASSERT(sizeof(float) == sizeof(*nbat->params().lj_comb.data()),
"Size of the LJ parameters element should be equal to the size of float2.");
/*! \brief Returns a string with the compiler defines required to avoid all flavour generation
*
- * For example if flavour eelTypeRF with evdwTypeFSWITCH, the output will be such that the corresponding
+ * For example if flavour ElecType::RF with VdwType::FSwitch, the output will be such that the corresponding
* kernel flavour is generated:
* -DGMX_OCL_FASTGEN (will replace flavour generator nbnxn_ocl_kernels.clh with nbnxn_ocl_kernels_fastgen.clh)
- * -DEL_RF (The eelTypeRF flavour)
+ * -DEL_RF (The ElecType::RF flavour)
* -DEELNAME=_ElecRF (The first part of the generated kernel name )
- * -DLJ_EWALD_COMB_GEOM (The evdwTypeFSWITCH flavour)
+ * -DLJ_EWALD_COMB_GEOM (The VdwType::FSwitch flavour)
* -DVDWNAME=_VdwLJEwCombGeom (The second part of the generated kernel name )
*
* prune/energy are still generated as originally. It is only the flavour-level that has changed, so that
* only the required flavour for the simulation is compiled.
*
- * If eeltype is single-range Ewald, then we need to add the
+ * If elecType is single-range Ewald, then we need to add the
* twin-cutoff flavour kernels to the JIT, because PME tuning might
* need it. This path sets -DGMX_OCL_FASTGEN_ADD_TWINCUT, which
* triggers the use of nbnxn_ocl_kernels_fastgen_add_twincut.clh. This
* JIT defaults to compiling all kernel flavours.
*
* \param[in] bFastGen Whether FastGen should be used
- * \param[in] eeltype Electrostatics kernel flavour for FastGen
- * \param[in] vdwtype VDW kernel flavour for FastGen
+ * \param[in] elecType Electrostatics kernel flavour for FastGen
+ * \param[in] vdwType VDW kernel flavour for FastGen
* \return String with the defines if FastGen is active
*
* \throws std::bad_alloc if out of memory
*/
-static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdwtype)
+static std::string makeDefinesForKernelTypes(bool bFastGen,
+ enum Nbnxm::ElecType elecType,
+ enum Nbnxm::VdwType vdwType)
{
+ using Nbnxm::ElecType;
std::string defines_for_kernel_types;
if (bFastGen)
{
- bool bIsEwaldSingleCutoff = (eeltype == eelTypeEWALD_TAB || eeltype == eelTypeEWALD_ANA);
+ bool bIsEwaldSingleCutoff = (elecType == ElecType::EwaldTab || elecType == ElecType::EwaldAna);
if (bIsEwaldSingleCutoff)
{
nbnxn_ocl_kernels_fastgen.clh. */
defines_for_kernel_types += "-DGMX_OCL_FASTGEN";
}
- defines_for_kernel_types += kernel_electrostatic_family_definitions[eeltype];
- defines_for_kernel_types += kernel_VdW_family_definitions[vdwtype];
+ defines_for_kernel_types += kernel_electrostatic_family_definitions[static_cast<int>(elecType)];
+ defines_for_kernel_types += kernel_VdW_family_definitions[static_cast<int>(vdwType)];
}
return defines_for_kernel_types;
try
{
std::string extraDefines =
- makeDefinesForKernelTypes(bFastGen, nb->nbparam->eeltype, nb->nbparam->vdwtype);
+ makeDefinesForKernelTypes(bFastGen, nb->nbparam->elecType, nb->nbparam->vdwType);
/* Here we pass macros and static const/constexpr int variables defined
* in include files outside the opencl as macros, to avoid
typedef struct cl_nbparam_params
{
- //! type of electrostatics, takes values from #eelCu
- int eeltype;
- //! type of VdW impl., takes values from #evdwCu
- int vdwtype;
+ //! type of electrostatics, takes values from #ElecType
+ int elecType;
+ //! type of VdW impl., takes values from #VdwType
+ int vdwType;
//! charge multiplication factor
float epsfac;
typedef struct cl_nbparam_params
{
- //! type of electrostatics, takes values from #eelType
- int eeltype;
- //! type of VdW impl., takes values from #evdwType
- int vdwtype;
+ //! type of electrostatics
+ enum Nbnxm::ElecType elecType;
+ //! type of VdW impl.
+ enum Nbnxm::VdwType vdwType;
//! charge multiplication factor
float epsfac;
float two_k_rf;
//! Ewald/PME parameter
float ewald_beta;
- //! Ewald/PME correction term substracted from the direct-space potential
+ //! Ewald/PME correction term subtracted from the direct-space potential
float sh_ewald;
//! LJ-Ewald/PME correction term added to the correction potential
float sh_lj_ewald;
/**< Pointers to non-bonded kernel functions
* organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */
///@{
- cl_kernel kernel_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } };
- cl_kernel kernel_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } };
- cl_kernel kernel_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } };
- cl_kernel kernel_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } };
+ cl_kernel kernel_noener_noprune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } };
+ cl_kernel kernel_ener_noprune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } };
+ cl_kernel kernel_noener_prune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } };
+ cl_kernel kernel_ener_prune_ptr[Nbnxm::c_numElecTypes][Nbnxm::c_numVdwTypes] = { { nullptr } };
///@}
//! prune kernels, ePruneKind defined the kernel kinds
cl_kernel kernel_pruneonly[ePruneNR] = { nullptr };