Converted the nbnxm kernel and ewald excl enums to enum classes.
Added resource, kernel setup and pairlist type enums.
Also made pairlistSets_ private.
Change-Id: Ic8ff8c59cf8a72d3462b8a0f7382cd1636136c40
real md3; // -V''' at the cutoff
};
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType)
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType)
{
/* Note that the current buffer estimation code only handles clusters
* of size 1, 2 or 4, so for 4x8 or 8x8 we use the estimate for 4x4.
*/
VerletbufListSetup listSetup;
- listSetup.cluster_size_i = nbnxn_kernel_to_cluster_i_size(nbnxnKernelType);
- listSetup.cluster_size_j = nbnxn_kernel_to_cluster_j_size(nbnxnKernelType);
+ listSetup.cluster_size_i = Nbnxm::IClusterSizePerKernelType[nbnxnKernelType];
+ listSetup.cluster_size_j = Nbnxm::JClusterSizePerKernelType[nbnxnKernelType];
- if (nbnxnKernelType == nbnxnk8x8x8_GPU ||
- nbnxnKernelType == nbnxnk8x8x8_PlainC)
+ if (!Nbnxm::kernelTypeUsesSimplePairlist(nbnxnKernelType))
{
/* The GPU kernels (except for OpenCL) split the j-clusters in two halves */
listSetup.cluster_size_j /= 2;
* i- and j-cluster sizes, so we potentially overestimate, but never
* underestimate, the buffer drift.
*/
- int nbnxnKernelType;
+ Nbnxm::KernelType nbnxnKernelType;
if (listType == ListSetupType::Gpu)
{
- nbnxnKernelType = nbnxnk8x8x8_GPU;
+ nbnxnKernelType = Nbnxm::KernelType::Gpu8x8x8;
}
else if (GMX_SIMD && listType == ListSetupType::CpuSimdWhenSupported)
{
#ifdef GMX_NBNXN_SIMD_2XNN
/* We use the smallest cluster size to be on the safe side */
- nbnxnKernelType = nbnxnk4xN_SIMD_2xNN;
+ nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_2xNN;
#else
- nbnxnKernelType = nbnxnk4xN_SIMD_4xN;
+ nbnxnKernelType = Nbnxm::KernelType::Cpu4xN_Simd_4xN;
#endif
}
else
{
- nbnxnKernelType = nbnxnk4x4_PlainC;
+ nbnxnKernelType = Nbnxm::KernelType::Cpu4x4_PlainC;
}
return verletbufGetListSetup(nbnxnKernelType);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
class RangePartitioning;
} // namespace gmx
+namespace Nbnxm
+{
+enum class KernelType;
+} // namespace Nbnxm
+
+
struct VerletbufListSetup
{
int cluster_size_i; /* Cluster pair-list i-cluster size atom count */
/* Returns the pair-list setup for the given nbnxn kernel type.
*/
-VerletbufListSetup verletbufGetListSetup(int nbnxnKernelType);
+VerletbufListSetup verletbufGetListSetup(Nbnxm::KernelType nbnxnKernelType);
/* Enum for choosing the list type for verletbufGetSafeListSetup() */
enum class ListSetupType
* the current coordinates of the atoms.
*/
wallcycle_sub_start(wcycle, ewcsNONBONDED_PRUNING);
- NbnxnDispatchPruneKernel(nbv, ilocality, fr->shift_vec);
+ nbv->dispatchPruneKernel(ilocality, fr->shift_vec);
wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
}
}
}
-static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
- t_forcerec *fr,
- rvec x[],
- rvec f[],
- const t_mdatoms *mdatoms,
- t_lambda *fepvals,
- real *lambda,
- gmx_enerdata_t *enerd,
- int flags,
- t_nrnb *nrnb,
- gmx_wallcycle_t wcycle)
+static void do_nb_verlet_fep(const nonbonded_verlet_t &nbv,
+ const Nbnxm::InteractionLocality iLocality,
+ t_forcerec *fr,
+ rvec x[],
+ rvec f[],
+ const t_mdatoms *mdatoms,
+ t_lambda *fepvals,
+ real *lambda,
+ gmx_enerdata_t *enerd,
+ int flags,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle)
{
int donb_flags;
nb_kernel_data_t kernel_data;
real lam_i[efptNR];
real dvdl_nb[efptNR];
- int th;
int i, j;
donb_flags = 0;
dvdl_nb[i] = 0;
}
- GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl, "Number of lists should be same as number of NB threads");
+ const gmx::ArrayRef<t_nblist const * const > nbl_fep = nbv.freeEnergyPairlistSet(iLocality);
+
+ GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
wallcycle_sub_start(wcycle, ewcsNONBONDED);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
- for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+ for (int th = 0; th < nbl_fep.ssize(); th++)
{
try
{
- gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ gmx_nb_free_energy_kernel(nbl_fep[th],
x, f, fr, mdatoms, &kernel_data, nrnb);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
}
reset_foreign_enerdata(enerd);
-#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
- for (th = 0; th < nbl_lists->nnbl; th++)
+#pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
+ for (int th = 0; th < nbl_fep.ssize(); th++)
{
try
{
- gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ gmx_nb_free_energy_kernel(nbl_fep[th],
x, f, fr, mdatoms, &kernel_data, nrnb);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
- &top->excls, step, nrnb);
+ nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
wallcycle_stop(wcycle, ewcNS);
}
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
- &top->excls, step, nrnb);
+ nbv->constructPairlist(Nbnxm::InteractionLocality::NonLocal,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
wallcycle_stop(wcycle, ewcNS);
}
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
+ if (fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::Local)[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
+ do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::Local,
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
}
- if (DOMAINDECOMP(cr) &&
- fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
+ if (havePPDomainDecomposition(cr) &&
+ fr->nbv->freeEnergyPairlistSet(Nbnxm::InteractionLocality::NonLocal)[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
+ do_nb_verlet_fep(*nbv, Nbnxm::InteractionLocality::NonLocal,
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
/* if there are multiple fshift output buffers reduce them */
if ((flags & GMX_FORCE_VIRIAL) &&
- nbv->pairlistSets[iloc].nnbl > 1)
+ nbv->pairlistSet(iloc).nnbl > 1)
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
}
/* skip the reduction if there was no non-local work to do */
- if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
+ if (!nbv->pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
{
nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
nbv->nbat, f, wcycle);
}
/* Initializes an nbnxn_atomdata_output_t data structure */
-nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(int nb_kernel_type,
+nbnxn_atomdata_output_t::nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
int numEnergyGroups,
int simdEnergyBufferStride,
gmx::PinningPolicy pinningPolicy) :
Vvdw.resize(numEnergyGroups*numEnergyGroups);
Vc.resize(numEnergyGroups*numEnergyGroups);
- if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
- nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
+ if (Nbnxm::kernelTypeIsSimd(kernelType))
{
- int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+ int cj_size = Nbnxm::JClusterSizePerKernelType[kernelType];
int numElements = numEnergyGroups*numEnergyGroups*simdEnergyBufferStride*(cj_size/2)*cj_size;
VSvdw.resize(numElements);
VSc.resize(numElements);
/* Initializes an nbnxn_atomdata_t::Params data structure */
static void nbnxn_atomdata_params_init(const gmx::MDLogger &mdlog,
nbnxn_atomdata_t::Params *params,
- int nb_kernel_type,
+ const Nbnxm::KernelType kernelType,
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups)
gmx::boolToString(bCombGeom), gmx::boolToString(bCombLB));
}
- simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
+ simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
switch (enbnxninitcombrule)
{
gmx_incons("Unknown enbnxninitcombrule");
}
- bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
- nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+ bSIMD = Nbnxm::kernelTypeIsSimd(kernelType);
set_lj_parameter_data(params, bSIMD);
/* Initializes an nbnxn_atomdata_t data structure */
void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
nbnxn_atomdata_t *nbat,
- int nb_kernel_type,
+ const Nbnxm::KernelType kernelType,
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups,
int nout)
{
- nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), nb_kernel_type,
+ nbnxn_atomdata_params_init(mdlog, &nbat->paramsDeprecated(), kernelType,
enbnxninitcombrule, ntype, nbfp, n_energygroups);
- const gmx_bool simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
- const gmx_bool bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
- nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+ const bool simple = Nbnxm::kernelTypeUsesSimplePairlist(kernelType);
+ const bool bSIMD = Nbnxm::kernelTypeIsSimd(kernelType);
if (simple)
{
if (bSIMD)
{
pack_x = std::max(c_nbnxnCpuIClusterSize,
- nbnxn_kernel_to_cluster_j_size(nb_kernel_type));
+ Nbnxm::JClusterSizePerKernelType[kernelType]);
switch (pack_x)
{
case 4:
for (int i = 0; i < nout; i++)
{
const auto &pinningPolicy = nbat->params().type.get_allocator().pinningPolicy();
- nbat->out.emplace_back(nb_kernel_type, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
+ nbat->out.emplace_back(kernelType, nbat->params().nenergrp, 1 << nbat->params().neg_2log,
pinningPolicy);
}
struct t_mdatoms;
struct gmx_wallcycle;
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
/* Reallocate the nbnxn_atomdata_t for a size of n atoms */
void nbnxn_atomdata_realloc(nbnxn_atomdata_t *nbat, int n);
*/
void nbnxn_atomdata_init(const gmx::MDLogger &mdlog,
nbnxn_atomdata_t *nbat,
- int nb_kernel_type,
+ Nbnxm::KernelType kernelType,
int enbnxninitcombrule,
int ntype, const real *nbfp,
int n_energygroups,
grid->bSimple = nbv->pairlistIsSimple();
- grid->na_c = nbnxn_kernel_to_cluster_i_size(nbv->kernelType_);
- grid->na_cj = nbnxn_kernel_to_cluster_j_size(nbv->kernelType_);
+ grid->na_c = IClusterSizePerListType[nbv->listParams->pairlistType];
+ grid->na_cj = JClusterSizePerListType[nbv->listParams->pairlistType];
grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
grid->na_c_2log = get_2log(grid->na_c);
* within this function.
*
* \param[in] pairlistSet Pairlists with local or non-local interactions to compute
- * \param[in] kernel_type The non-bonded kernel type
- * \param[in] ewald_excl The Ewald exclusion treatment
+ * \param[in] kernelSetup The non-bonded kernel setup
* \param[in,out] nbat The atomdata for the interactions
* \param[in] ic Non-bonded interaction constants
* \param[in] shiftVectors The PBC shift vectors
*/
static void
nbnxn_kernel_cpu(const nbnxn_pairlist_set_t &pairlistSet,
- const int kernel_type,
- const int ewald_excl,
+ const Nbnxm::KernelSetup &kernelSetup,
nbnxn_atomdata_t *nbat,
const interaction_const_t &ic,
rvec *shiftVectors,
}
else
{
- if (ewald_excl == ewaldexclTable)
+ if (kernelSetup.ewaldExclusionType == Nbnxm::EwaldExclusionType::Table)
{
if (ic.rcoulomb == ic.rvdw)
{
{
vdwkt = vdwktLJEWALDCOMBLB;
/* At setup we (should have) selected the C reference kernel */
- GMX_RELEASE_ASSERT(kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+ GMX_RELEASE_ASSERT(kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
}
}
else
if (!(forceFlags & GMX_FORCE_ENERGY))
{
/* Don't calculate energies */
- switch (kernel_type)
+ switch (kernelSetup.kernelType)
{
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
fshift_p);
break;
#ifdef GMX_NBNXN_SIMD_2XNN
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
nbnxm_kernel_noener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
nbnxm_kernel_noener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
out->Vvdw[0] = 0;
out->Vc[0] = 0;
- switch (kernel_type)
+ switch (kernelSetup.kernelType)
{
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
out->Vc.data());
break;
#ifdef GMX_NBNXN_SIMD_2XNN
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
nbnxm_kernel_ener_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
nbnxm_kernel_ener_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
shiftVectors,
int unrollj = 0;
- switch (kernel_type)
+ switch (kernelSetup.kernelType)
{
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
unrollj = c_nbnxnCpuIClusterSize;
nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
&ic,
out->Vc.data());
break;
#ifdef GMX_NBNXN_SIMD_2XNN
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
unrollj = GMX_SIMD_REAL_WIDTH/2;
nbnxm_kernel_energrp_simd_2xmm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
break;
#endif
#ifdef GMX_NBNXN_SIMD_4XN
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
unrollj = GMX_SIMD_REAL_WIDTH;
nbnxm_kernel_energrp_simd_4xm[coulkt][vdwkt](nbl[nb], nbat,
&ic,
GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
- if (kernel_type != nbnxnk4x4_PlainC)
+ if (kernelSetup.kernelType != Nbnxm::KernelType::Cpu4x4_PlainC)
{
switch (unrollj)
{
const interaction_const_t &ic,
const int forceFlags)
{
- const nbnxn_pairlist_set_t &pairlistSet = nbv.pairlistSets[iLocality];
+ const nbnxn_pairlist_set_t &pairlistSet = nbv.pairlistSet(iLocality);
const bool usingGpuKernels = nbv.useGpu();
int enr_nbnxn_kernel_ljc;
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
}
- else if ((!usingGpuKernels && nbv.ewaldExclusionType_ == ewaldexclAnalytical) ||
+ else if ((!usingGpuKernels && nbv.kernelSetup().ewaldExclusionType == Nbnxm::EwaldExclusionType::Analytical) ||
(usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
gmx_enerdata_t *enerd,
t_nrnb *nrnb)
{
- const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSets[iLocality];
+ const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSet(iLocality);
- switch (nbv->kernelType_)
+ switch (nbv->kernelSetup().kernelType)
{
- case nbnxnk4x4_PlainC:
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
nbnxn_kernel_cpu(pairlistSet,
- nbv->kernelType_,
- nbv->ewaldExclusionType_,
+ nbv->kernelSetup(),
nbv->nbat,
ic,
fr->shift_vec,
enerd->grpp.ener[egLJSR]);
break;
- case nbnxnk8x8x8_GPU:
+ case Nbnxm::KernelType::Gpu8x8x8:
Nbnxm::gpu_launch_kernel(nbv->gpu_nbv, forceFlags, iLocality);
break;
- case nbnxnk8x8x8_PlainC:
+ case Nbnxm::KernelType::Cpu8x8x8_PlainC:
nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
nbv->nbat, &ic,
fr->shift_vec,
struct gmx_mtop_t;
struct interaction_const_t;
struct nbnxn_pairlist_set_t;
+struct nonbonded_verlet_t;
struct t_blocka;
struct t_commrec;
struct t_nrnb;
class UpdateGroupsCog;
}
-//! Help pass GPU-emulation parameters with type safety.
-enum class EmulateGpuNonbonded : bool
+/*! \brief Resources that can be used to execute non-bonded kernels on */
+enum class NonbondedResource : int
{
- //! Do not emulate GPUs.
- No,
- //! Do emulate GPUs.
- Yes
+ Cpu,
+ Gpu,
+ EmulateGpu
};
+namespace Nbnxm
+{
/*! \brief Nonbonded NxN kernel types: plain C, CPU SIMD, GPU, GPU emulation */
-typedef enum
+enum class KernelType : int
{
- nbnxnkNotSet = 0,
- nbnxnk4x4_PlainC,
- nbnxnk4xN_SIMD_4xN,
- nbnxnk4xN_SIMD_2xNN,
- nbnxnk8x8x8_GPU,
- nbnxnk8x8x8_PlainC,
- nbnxnkNR
-} nbnxn_kernel_type;
+ NotSet = 0,
+ Cpu4x4_PlainC,
+ Cpu4xN_Simd_4xN,
+ Cpu4xN_Simd_2xNN,
+ Gpu8x8x8,
+ Cpu8x8x8_PlainC,
+ Count
+};
-namespace Nbnxm
+/*! \brief Ewald exclusion types */
+enum class EwaldExclusionType : int
+{
+ NotSet = 0,
+ Table,
+ Analytical,
+ DecidedByGpuModule
+};
+
+/* \brief The non-bonded setup, also affects the pairlist construction kernel */
+struct KernelSetup
{
+ //! The non-bonded type, also affects the pairlist construction kernel
+ KernelType kernelType = KernelType::NotSet;
+ //! Ewald exclusion computation handling type, currently only used for CPU
+ EwaldExclusionType ewaldExclusionType = EwaldExclusionType::NotSet;
+};
/*! \brief Return a string identifying the kernel type.
*
- * \param [in] kernel_type nonbonded kernel types, takes values from the nbnxn_kernel_type enum
- * \returns a string identifying the kernel corresponding to the type passed as argument
+ * \param [in] kernelType nonbonded kernel type, takes values from the nbnxn_kernel_type enum
+ * \returns a string identifying the kernel corresponding to the type passed as argument
*/
-const char *lookup_kernel_name(int kernel_type);
+const char *lookup_kernel_name(Nbnxm::KernelType kernelType);
} // namespace Nbnxm
-/*! \brief Ewald exclusion types */
-enum {
- ewaldexclTable, ewaldexclAnalytical
-};
-
/*! \brief Flag to tell the nonbonded kernels whether to clear the force output buffers */
enum {
enbvClearFNo, enbvClearFYes
};
+/*! \brief Generates a pair-list for the given locality.
+ *
+ * With perturbed particles, also a group scheme style nbl_fep list is made.
+ */
+void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
+ Nbnxm::InteractionLocality iLocality,
+ nbnxn_pairlist_set_t *pairlistSet,
+ const t_blocka *excl,
+ int64_t step,
+ t_nrnb *nrnb);
+
+/*! \brief Prune all pair-lists with given locality (currently CPU only)
+ *
+ * For all pair-lists with given locality, takes the outer list and prunes out
+ * pairs beyond the pairlist inner radius and writes the result to a list that is
+ * to be consumed by the non-bonded kernel.
+ */
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t *pairlistSet,
+ Nbnxm::KernelType kernelType,
+ const nbnxn_atomdata_t *nbat,
+ const rvec *shift_vec);
+
/*! \libinternal
* \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
struct nonbonded_verlet_t
{
- //! Returns whether a GPU is used for the non-bonded calculations
- bool useGpu() const
- {
- return kernelType_ == nbnxnk8x8x8_GPU;
- }
-
- //! Returns whether a GPU is emulated for the non-bonded calculations
- bool emulateGpu() const
- {
- return kernelType_ == nbnxnk8x8x8_PlainC;
- }
-
- //! Return whether the pairlist is of simple, CPU type
- bool pairlistIsSimple() const
- {
- return !useGpu() && !emulateGpu();
- }
-
- std::unique_ptr<NbnxnListParameters> listParams; /**< Parameters for the search and list pruning setup */
- std::unique_ptr<nbnxn_search> nbs; /**< n vs n atom pair searching data */
- int ngrp; /**< number of interaction groups */
- //! Local and non-local pairlist sets
- gmx::EnumerationArray<Nbnxm::InteractionLocality, nbnxn_pairlist_set_t> pairlistSets;
- //! Atom data
- nbnxn_atomdata_t *nbat;
-
- //! Non-bonded kernel - see enum above
- int kernelType_;
- //! Ewald exclusion - see enum above
- int ewaldExclusionType_;
-
- gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */
- int min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels */
+ public:
+ //! Returns whether a GPU is use for the non-bonded calculations
+ bool useGpu() const
+ {
+ return kernelSetup_.kernelType == Nbnxm::KernelType::Gpu8x8x8;
+ }
+
+ //! Returns whether a GPU is emulated for the non-bonded calculations
+ bool emulateGpu() const
+ {
+ return kernelSetup_.kernelType == Nbnxm::KernelType::Cpu8x8x8_PlainC;
+ }
+
+ //! Return whether the pairlist is of simple, CPU type
+ bool pairlistIsSimple() const
+ {
+ return !useGpu() && !emulateGpu();
+ }
+
+ //! Initialize the pair list sets, TODO this should be private
+ void initPairlistSets(bool haveMultipleDomains);
+
+ //! Returns a reference to the pairlist set for the requested locality
+ const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+ {
+ GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+ "The requested locality should be in the list");
+ return pairlistSets_[static_cast<int>(iLocality)];
+ }
+
+ //! Constructs the pairlist for the given locality
+ void constructPairlist(Nbnxm::InteractionLocality iLocality,
+ const t_blocka *excl,
+ int64_t step,
+ t_nrnb *nrnb)
+ {
+ nbnxn_make_pairlist(this, iLocality, &pairlistSets_[static_cast<int>(iLocality)], excl, step, nrnb);
+ }
+
+ //! Dispatches the dynamic pruning kernel for the given locality
+ void dispatchPruneKernel(Nbnxm::InteractionLocality iLocality,
+ const rvec *shift_vec)
+ {
+ GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
+ "The requested locality should be in the list");
+ NbnxnDispatchPruneKernel(&pairlistSets_[static_cast<int>(iLocality)],
+ kernelSetup_.kernelType, nbat, shift_vec);
+ }
+
+ //! Return the kernel setup
+ const Nbnxm::KernelSetup &kernelSetup() const
+ {
+ return kernelSetup_;
+ }
+
+ //! Sets the kernel setup, TODO: make private
+ void setKernelSetup(const Nbnxm::KernelSetup &kernelSetup)
+ {
+ kernelSetup_ = kernelSetup;
+ }
+
+ //! Returns the a list of free-energy pairlists for the given locality
+ const gmx::ArrayRef<t_nblist const * const>
+ freeEnergyPairlistSet(Nbnxm::InteractionLocality iLocality) const
+ {
+ return pairlistSet(iLocality).nbl_fep;
+ }
+
+ //! Parameters for the search and list pruning setup
+ std::unique_ptr<NbnxnListParameters> listParams;
+ //! Working data for constructing the pairlists
+ std::unique_ptr<nbnxn_search> nbs;
+ private:
+ //! Local and, optionally, non-local pairlist sets
+ std::vector<nbnxn_pairlist_set_t> pairlistSets_;
+ public:
+ //! Atom data
+ nbnxn_atomdata_t *nbat;
+
+ private:
+ //! The non-bonded setup, also affects the pairlist construction kernel
+ Nbnxm::KernelSetup kernelSetup_;
+ public:
+
+ gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */
+ int min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels */
};
namespace Nbnxm
/*! \brief Returns the index position of the atoms on the pairlist search grid */
gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
-/*! \brief Generates a pair-list for the given locality.
- *
- * With perturbed particles, also a group scheme style nbl_fep list is made.
- */
-void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
- Nbnxm::InteractionLocality iLocality,
- const t_blocka *excl,
- int64_t step,
- t_nrnb *nrnb);
-
/*! \brief Returns the number of steps performed with the current pair list */
int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
Nbnxm::InteractionLocality ilocality,
Nbnxm::InteractionLocality ilocality,
int64_t step);
-/*! \brief Prune all pair-lists with given locality (currently CPU only)
- *
- * For all pair-lists with given locality, takes the outer list and prunes out
- * pairs beyond the pairlist inner radius and writes the result to a list that is
- * to be consumed by the non-bonded kernel.
- */
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t *nbv,
- Nbnxm::InteractionLocality iLocality,
- const rvec *shift_vec);
-
/*! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU */
void NbnxnDispatchKernel(nonbonded_verlet_t *nbv,
Nbnxm::InteractionLocality iLocality,
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/pairlist.h"
-#include "gromacs/simd/simd.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/real.h"
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
-{
- if (nb_kernel_type == nbnxnkNotSet)
- {
- gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
- }
-
- switch (nb_kernel_type)
- {
- case nbnxnk8x8x8_GPU:
- case nbnxnk8x8x8_PlainC:
- return false;
-
- case nbnxnk4x4_PlainC:
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
- return true;
-
- default:
- gmx_incons("Invalid nonbonded kernel type passed!");
- return false;
- }
-}
-
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type)
-{
- switch (nb_kernel_type)
- {
- case nbnxnk4x4_PlainC:
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
- return c_nbnxnCpuIClusterSize;
- case nbnxnk8x8x8_GPU:
- case nbnxnk8x8x8_PlainC:
- /* The cluster size for super/sub lists is only set here.
- * Any value should work for the pair-search and atomdata code.
- * The kernels, of course, might require a particular value.
- */
- return c_nbnxnGpuClusterSize;
- default:
- gmx_incons("unknown kernel type");
- }
-}
-
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type)
-{
- int nbnxn_simd_width = 0;
- int cj_size = 0;
-
-#if GMX_SIMD
- nbnxn_simd_width = GMX_SIMD_REAL_WIDTH;
-#endif
-
- switch (nb_kernel_type)
- {
- case nbnxnk4x4_PlainC:
- cj_size = c_nbnxnCpuIClusterSize;
- break;
- case nbnxnk4xN_SIMD_4xN:
- cj_size = nbnxn_simd_width;
- break;
- case nbnxnk4xN_SIMD_2xNN:
- cj_size = nbnxn_simd_width/2;
- break;
- case nbnxnk8x8x8_GPU:
- case nbnxnk8x8x8_PlainC:
- cj_size = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
- break;
- default:
- gmx_incons("unknown kernel type");
- }
-
- return cj_size;
-}
-
/* Clusters at the cut-off only increase rlist by 60% of their size */
static constexpr real c_nbnxnRlistIncreaseOutsideFactor = 0.6;
#define GMX_NBNXM_NBNXM_GEOMETRY_H
#include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/simd/simd.h"
#include "gromacs/utility/fatalerror.h"
/* Returns the base-2 log of n.
return log2;
}
+namespace Nbnxm
+{
+
+/* The nbnxn i-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> IClusterSizePerKernelType =
+{
+ 0,
+ c_nbnxnCpuIClusterSize,
+ c_nbnxnCpuIClusterSize,
+ c_nbnxnCpuIClusterSize,
+ c_nbnxnGpuClusterSize,
+ c_nbnxnGpuClusterSize
+};
+
+/* The nbnxn j-cluster size in atoms for each nbnxn kernel type */
+static constexpr gmx::EnumerationArray<KernelType, int> JClusterSizePerKernelType =
+{
+ 0,
+ c_nbnxnCpuIClusterSize,
+#if GMX_SIMD
+ GMX_SIMD_REAL_WIDTH,
+ GMX_SIMD_REAL_WIDTH/2,
+#else
+ 0,
+ 0,
+#endif
+ c_nbnxnGpuClusterSize,
+ c_nbnxnGpuClusterSize
+};
+
/* Returns whether the pair-list corresponding to nb_kernel_type is simple */
-bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+static inline bool kernelTypeUsesSimplePairlist(const KernelType kernelType)
+{
+ return (kernelType == KernelType::Cpu4x4_PlainC ||
+ kernelType == KernelType::Cpu4xN_Simd_4xN ||
+ kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type);
+static inline bool kernelTypeIsSimd(const KernelType kernelType)
+{
+ return (kernelType == KernelType::Cpu4xN_Simd_4xN ||
+ kernelType == KernelType::Cpu4xN_Simd_2xNN);
+}
-/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
-int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type);
+} // namespace Nbnxm
/* Returns the effective list radius of the pair-list
*
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
+#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/nbnxm/pairlist_tuning.h"
#include "gromacs/nbnxm/pairlistset.h"
#include "gromacs/simd/simd.h"
}
/*! \brief Returns the most suitable CPU kernel type and Ewald handling */
-static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
- int *kernel_type,
- int *ewald_excl,
- const gmx_hw_info_t gmx_unused &hardwareInfo)
+static KernelSetup
+pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
+ const gmx_hw_info_t gmx_unused &hardwareInfo)
{
- *kernel_type = nbnxnk4x4_PlainC;
- *ewald_excl = ewaldexclTable;
+ KernelSetup kernelSetup;
-#if GMX_SIMD
+ if (!GMX_SIMD)
+ {
+ kernelSetup.kernelType = KernelType::Cpu4x4_PlainC;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
+ }
+ else
{
#ifdef GMX_NBNXN_SIMD_4XN
- *kernel_type = nbnxnk4xN_SIMD_4xN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
#endif
#ifdef GMX_NBNXN_SIMD_2XNN
- *kernel_type = nbnxnk4xN_SIMD_2xNN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
#endif
#if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
* use of HT, use 4x8 to avoid a potential performance hit.
* On Intel Haswell 4x8 is always faster.
*/
- *kernel_type = nbnxnk4xN_SIMD_4xN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
-#if !GMX_SIMD_HAVE_FMA
- if (EEL_PME_EWALD(ir->coulombtype) ||
- EVDW_PME(ir->vdwtype))
+ if (!GMX_SIMD_HAVE_FMA && (EEL_PME_EWALD(ir->coulombtype) ||
+ EVDW_PME(ir->vdwtype)))
{
/* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
* There are enough instructions to make 2x(4+4) efficient.
*/
- *kernel_type = nbnxnk4xN_SIMD_2xNN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
}
-#endif
+
if (hardwareInfo.haveAmdZenCpu)
{
/* One 256-bit FMA per cycle makes 2xNN faster */
- *kernel_type = nbnxnk4xN_SIMD_2xNN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
}
#endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
if (getenv("GMX_NBNXN_SIMD_4XN") != nullptr)
{
#ifdef GMX_NBNXN_SIMD_4XN
- *kernel_type = nbnxnk4xN_SIMD_4xN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_4xN;
#else
gmx_fatal(FARGS, "SIMD 4xN kernels requested, but GROMACS has been compiled without support for these kernels");
#endif
if (getenv("GMX_NBNXN_SIMD_2XNN") != nullptr)
{
#ifdef GMX_NBNXN_SIMD_2XNN
- *kernel_type = nbnxnk4xN_SIMD_2xNN;
+ kernelSetup.kernelType = KernelType::Cpu4xN_Simd_2xNN;
#else
gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but GROMACS has been compiled without support for these kernels");
#endif
* will probably always be faster for a SIMD width of 8 or more.
* With FMA analytical is sometimes faster for a width if 4 as well.
* In single precision, this is faster on Bulldozer.
- */
-#if GMX_SIMD_REAL_WIDTH >= 8 || \
- (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)
- /* On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
+ * On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
* of single or double precision and 128 or 256-bit AVX2.
*/
- if (!hardwareInfo.haveAmdZenCpu)
+ if (
+#if GMX_SIMD
+ (GMX_SIMD_REAL_WIDTH >= 8 ||
+ (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)) &&
+#endif
+ !hardwareInfo.haveAmdZenCpu)
{
- *ewald_excl = ewaldexclAnalytical;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
+ }
+ else
+ {
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
}
-#endif
if (getenv("GMX_NBNXN_EWALD_TABLE") != nullptr)
{
- *ewald_excl = ewaldexclTable;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
}
if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != nullptr)
{
- *ewald_excl = ewaldexclAnalytical;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
}
}
-#endif // GMX_SIMD
+
+ return kernelSetup;
}
-const char *lookup_kernel_name(int kernel_type)
+const char *lookup_kernel_name(const KernelType kernelType)
{
const char *returnvalue = nullptr;
- switch (kernel_type)
+ switch (kernelType)
{
- case nbnxnkNotSet:
+ case KernelType::NotSet:
returnvalue = "not set";
break;
- case nbnxnk4x4_PlainC:
+ case KernelType::Cpu4x4_PlainC:
returnvalue = "plain C";
break;
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
+ case KernelType::Cpu4xN_Simd_4xN:
+ case KernelType::Cpu4xN_Simd_2xNN:
#if GMX_SIMD
returnvalue = "SIMD";
#else // GMX_SIMD
returnvalue = "not available";
#endif // GMX_SIMD
break;
- case nbnxnk8x8x8_GPU: returnvalue = "GPU"; break;
- case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
+ case KernelType::Gpu8x8x8: returnvalue = "GPU"; break;
+ case KernelType::Cpu8x8x8_PlainC: returnvalue = "plain C"; break;
- case nbnxnkNR:
default:
gmx_fatal(FARGS, "Illegal kernel type selected");
}
};
/*! \brief Returns the most suitable kernel type and Ewald handling */
-static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
- gmx_bool use_simd_kernels,
- const gmx_hw_info_t &hardwareInfo,
- bool useGpu,
- EmulateGpuNonbonded emulateGpu,
- const t_inputrec *ir,
- int *kernel_type,
- int *ewald_excl,
- gmx_bool bDoNonbonded)
+static KernelSetup
+pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
+ gmx_bool use_simd_kernels,
+ const gmx_hw_info_t &hardwareInfo,
+ const NonbondedResource &nonbondedResource,
+ const t_inputrec *ir,
+ gmx_bool bDoNonbonded)
{
- GMX_RELEASE_ASSERT(kernel_type, "Need a valid kernel_type pointer");
+ KernelSetup kernelSetup;
- *kernel_type = nbnxnkNotSet;
- *ewald_excl = ewaldexclTable;
-
- if (emulateGpu == EmulateGpuNonbonded::Yes)
+ if (nonbondedResource == NonbondedResource::EmulateGpu)
{
- *kernel_type = nbnxnk8x8x8_PlainC;
+ kernelSetup.kernelType = KernelType::Cpu8x8x8_PlainC;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
if (bDoNonbonded)
{
GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
}
}
- else if (useGpu)
+ else if (nonbondedResource == NonbondedResource::Gpu)
{
- *kernel_type = nbnxnk8x8x8_GPU;
+ kernelSetup.kernelType = KernelType::Gpu8x8x8;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::DecidedByGpuModule;
}
-
- if (*kernel_type == nbnxnkNotSet)
+ else
{
if (use_simd_kernels &&
nbnxn_simd_supported(mdlog, ir))
{
- pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl, hardwareInfo);
+ kernelSetup = pick_nbnxn_kernel_cpu(ir, hardwareInfo);
}
else
{
- *kernel_type = nbnxnk4x4_PlainC;
+ kernelSetup.kernelType = KernelType::Cpu4x4_PlainC;
+ kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
}
}
{
GMX_LOG(mdlog.info).asParagraph().appendTextFormatted(
"Using %s %dx%d nonbonded short-range kernels",
- lookup_kernel_name(*kernel_type),
- nbnxn_kernel_to_cluster_i_size(*kernel_type),
- nbnxn_kernel_to_cluster_j_size(*kernel_type));
+ lookup_kernel_name(kernelSetup.kernelType),
+ IClusterSizePerKernelType[kernelSetup.kernelType],
+ JClusterSizePerKernelType[kernelSetup.kernelType]);
- if (nbnxnk4x4_PlainC == *kernel_type ||
- nbnxnk8x8x8_PlainC == *kernel_type)
+ if (KernelType::Cpu4x4_PlainC == kernelSetup.kernelType ||
+ KernelType::Cpu8x8x8_PlainC == kernelSetup.kernelType)
{
GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
"WARNING: Using the slow %s kernels. This should\n"
"not happen during routine usage on supported platforms.",
- lookup_kernel_name(*kernel_type));
+ lookup_kernel_name(kernelSetup.kernelType));
}
}
+
+ GMX_RELEASE_ASSERT(kernelSetup.kernelType != KernelType::NotSet &&
+ kernelSetup.ewaldExclusionType != EwaldExclusionType::NotSet,
+ "All kernel setup parameters should be set here");
+
+ return kernelSetup;
}
+} // namespace Nbnxm
+
+void nonbonded_verlet_t::initPairlistSets(const bool haveMultipleDomains)
+{
+ pairlistSets_.emplace_back(*listParams);
+ if (haveMultipleDomains)
+ {
+ pairlistSets_.emplace_back(*listParams);
+ }
+}
+
+namespace Nbnxm
+{
+
void init_nb_verlet(const gmx::MDLogger &mdlog,
nonbonded_verlet_t **nb_verlet,
gmx_bool bFEP_NonBonded,
const gmx_mtop_t *mtop,
matrix box)
{
- nonbonded_verlet_t *nbv = new nonbonded_verlet_t();
+ nonbonded_verlet_t *nbv = new nonbonded_verlet_t();
- const EmulateGpuNonbonded emulateGpu =
- ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
- bool useGpu = deviceInfo != nullptr;
+ const bool emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
+ const bool useGpu = deviceInfo != nullptr;
- GMX_RELEASE_ASSERT(!(emulateGpu == EmulateGpuNonbonded::Yes && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+ GMX_RELEASE_ASSERT(!(emulateGpu && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
+
+ NonbondedResource nonbondedResource;
+ if (useGpu)
+ {
+ nonbondedResource = NonbondedResource::Gpu;
+ }
+ else if (emulateGpu)
+ {
+ nonbondedResource = NonbondedResource::EmulateGpu;
+ }
+ else
+ {
+ nonbondedResource = NonbondedResource::Cpu;
+ }
nbv->nbs = nullptr;
- pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
- useGpu, emulateGpu, ir,
- &nbv->kernelType_,
- &nbv->ewaldExclusionType_,
- fr->bNonbonded);
+ nbv->setKernelSetup(pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+ nonbondedResource, ir,
+ fr->bNonbonded));
const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
- const bool pairlistIsSimple = nbv->pairlistIsSimple();
- for (nbnxn_pairlist_set_t &pairlistSet : nbv->pairlistSets)
- {
- // TODO Change this to a constructor
- /* The second parameter tells whether lists should be combined,
- * this is currently only and always done for GPU lists.
- */
- nbnxn_init_pairlist_set(&pairlistSet, pairlistIsSimple, !pairlistIsSimple);
- }
+ nbv->listParams = std::make_unique<NbnxnListParameters>(nbv->kernelSetup().kernelType,
+ ir->rlist);
+ nbv->initPairlistSets(haveMultipleDomains);
nbv->min_ci_balanced = 0;
- nbv->listParams = std::make_unique<NbnxnListParameters>(ir->rlist);
- setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->kernelType_, fr->ic,
+ setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
nbv->listParams.get());
nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
}
nbnxn_atomdata_init(mdlog,
nbv->nbat,
- nbv->kernelType_,
+ nbv->kernelSetup().kernelType,
enbnxninitcombrule,
fr->ntype, fr->nbfp,
mimimumNumEnergyGroupNonbonded,
- pairlistIsSimple ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+ nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
if (useGpu)
{
work = new NbnxnPairlistGpuWork();
}
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
- gmx_bool bSimple, gmx_bool bCombined)
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list)
{
- GMX_RELEASE_ASSERT(!bSimple || !bCombined, "Can only combine non-simple lists");
-
- nbl_list->bSimple = bSimple;
- nbl_list->bCombined = bCombined;
+ nbl_list->bSimple =
+ (nbl_list->params.pairlistType == PairlistType::Simple4x2 ||
+ nbl_list->params.pairlistType == PairlistType::Simple4x4 ||
+ nbl_list->params.pairlistType == PairlistType::Simple4x8);
+ // Currently GPU lists are always combined
+ nbl_list->bCombined = !nbl_list->bSimple;
nbl_list->nnbl = gmx_omp_nthreads_get(emntNonbonded);
nbl_list->nnbl, NBNXN_BUFFERFLAG_MAX_THREADS, NBNXN_BUFFERFLAG_MAX_THREADS);
}
- if (bSimple)
+ if (nbl_list->bSimple)
{
snew(nbl_list->nbl, nbl_list->nnbl);
if (nbl_list->nnbl > 1)
{
snew(nbl_list->nblGpu, nbl_list->nnbl);
}
- snew(nbl_list->nbl_fep, nbl_list->nnbl);
+ nbl_list->nbl_fep.resize(nbl_list->nnbl);
/* Execute in order to avoid memory interleaving between threads */
#pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static)
for (int i = 0; i < nbl_list->nnbl; i++)
/* Allocate the nblist data structure locally on each thread
* to optimize memory access for NUMA architectures.
*/
- if (bSimple)
+ if (nbl_list->bSimple)
{
nbl_list->nbl[i] = new NbnxnPairlistCpu();
static void icell_set_x(int ci,
real shx, real shy, real shz,
int stride, const real *x,
- int nb_kernel_type,
+ const Nbnxm::KernelType kernelType,
NbnxnPairlistCpuWork *work)
{
- switch (nb_kernel_type)
+ switch (kernelType)
{
#if GMX_SIMD
#ifdef GMX_NBNXN_SIMD_4XN
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
icell_set_x_simd_4xn(ci, shx, shy, shz, stride, x, work);
break;
#endif
#ifdef GMX_NBNXN_SIMD_2XNN
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
icell_set_x_simd_2xnn(ci, shx, shy, shz, stride, x, work);
break;
#endif
#endif
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
icell_set_x_simple(ci, shx, shy, shz, stride, x, &work->iClusterData);
break;
default:
static void icell_set_x(int ci,
real shx, real shy, real shz,
int stride, const real *x,
- int gmx_unused nb_kernel_type,
+ Nbnxm::KernelType gmx_unused kernelType,
NbnxnPairlistGpuWork *work)
{
#if !GMX_SIMD4_HAVE_REAL
const nbnxn_atomdata_t *nbat,
const real rlist2,
const real rbb2,
- const int nb_kernel_type,
+ const Nbnxm::KernelType kernelType,
int *numDistanceChecks)
{
- switch (nb_kernel_type)
+ switch (kernelType)
{
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
makeClusterListSimple(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
numDistanceChecks);
break;
#ifdef GMX_NBNXN_SIMD_4XN
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
makeClusterListSimd4xn(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
break;
#endif
#ifdef GMX_NBNXN_SIMD_2XNN
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
makeClusterListSimd2xnn(jGrid,
nbl, ci, firstCell, lastCell,
excludeSubDiagonal,
numDistanceChecks);
break;
#endif
+ default:
+ GMX_ASSERT(false, "Unhandled kernel type");
}
}
const nbnxn_atomdata_t *nbat,
const real rlist2,
const real rbb2,
- const int gmx_unused nb_kernel_type,
+ Nbnxm::KernelType gmx_unused kernelType,
int *numDistanceChecks)
{
for (int cj = firstCell; cj <= lastCell; cj++)
const nbnxn_atomdata_t *nbat,
const t_blocka &exclusions,
real rlist,
- int nb_kernel_type,
+ const Nbnxm::KernelType kernelType,
int ci_block,
gmx_bool bFBufferFlag,
int nsubpair_max,
sync_work(nbl);
GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match");
- nbl->na_cj = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+ nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType];
na_cj_2log = get_2log(nbl->na_cj);
nbl->rlist = rlist;
icell_set_x(cell0_i+ci, shx, shy, shz,
nbat->xstride, nbat->x().data(),
- nb_kernel_type,
+ kernelType,
nbl->work);
for (int cx = cxf; cx <= cxl; cx++)
excludeSubDiagonal,
nbat,
rlist2, rbb2,
- nb_kernel_type,
+ kernelType,
&numDistanceChecks);
if (bFBufferFlag)
void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
const InteractionLocality iLocality,
+ nbnxn_pairlist_set_t *nbl_list,
const t_blocka *excl,
const int64_t step,
t_nrnb *nrnb)
nbnxn_search *nbs = nbv->nbs.get();
nbnxn_atomdata_t *nbat = nbv->nbat;
const real rlist = nbv->listParams->rlistOuter;
- nbnxn_pairlist_set_t *nbl_list = &nbv->pairlistSets[iLocality];
int nsubpair_target;
float nsubpair_tot_est;
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nbv->kernelType_,
+ nbv->kernelSetup().kernelType,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nbv->kernelType_,
+ nbv->kernelSetup().kernelType,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/bitmask.h"
#include "gromacs/utility/defaultinitializationallocator.h"
+#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
// This file with constants is separate from this file to be able
struct NbnxnPairlistGpuWork;
struct tMPI_Atomic;
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
/* Convenience type for vector with aligned memory */
template<typename T>
using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
template<typename T>
using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
+enum class PairlistType : int
+{
+ Simple4x2,
+ Simple4x4,
+ Simple4x8,
+ Hierarchical8x8,
+ Count
+};
+
+static constexpr gmx::EnumerationArray<PairlistType, int> IClusterSizePerListType = { 4, 4, 4, 8 };
+static constexpr gmx::EnumerationArray<PairlistType, int> JClusterSizePerListType = { 2, 4, 8, 8 };
+
/*! \cond INTERNAL */
/*! \brief The setup for generating and pruning the nbnxn pair list.
{
/*! \brief Constructor producing a struct with dynamic pruning disabled
*/
- NbnxnListParameters(real rlist) :
- useDynamicPruning(false),
- nstlistPrune(-1),
- rlistOuter(rlist),
- rlistInner(rlist),
- numRollingParts(1)
- {
- }
-
- bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
- int nstlistPrune; //!< Pair-list dynamic pruning interval
- real rlistOuter; //!< Cut-off of the larger, outer pair-list
- real rlistInner; //!< Cut-off of the smaller, inner pair-list
- int numRollingParts; //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+ NbnxnListParameters(Nbnxm::KernelType kernelType,
+ real rlist);
+
+ PairlistType pairlistType; //!< The type of cluster-pair list
+ bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
+ int nstlistPrune; //!< Pair-list dynamic pruning interval
+ real rlistOuter; //!< Cut-off of the larger, outer pair-list
+ real rlistInner; //!< Cut-off of the smaller, inner pair-list
+ int numRollingParts; //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
};
/*! \endcond */
struct nbnxn_pairlist_set_t
{
- int nnbl; /* number of lists */
- NbnxnPairlistCpu **nbl; /* lists for CPU */
- NbnxnPairlistCpu **nbl_work; /* work space for rebalancing lists */
- NbnxnPairlistGpu **nblGpu; /* lists for GPU */
- gmx_bool bCombined; /* TRUE if lists get combined into one (the 1st) */
- gmx_bool bSimple; /* TRUE if the list of of type "simple"
+ nbnxn_pairlist_set_t(const NbnxnListParameters &listParams);
+
+ int nnbl; /* number of lists */
+ NbnxnPairlistCpu **nbl; /* lists for CPU */
+ NbnxnPairlistCpu **nbl_work; /* work space for rebalancing lists */
+ NbnxnPairlistGpu **nblGpu; /* lists for GPU */
+ const NbnxnListParameters ¶ms; /* Pairlist parameters desribing setup and ranges */
+ gmx_bool bCombined; /* TRUE if lists get combined into one (the 1st) */
+ gmx_bool bSimple; /* TRUE if the list of of type "simple"
(na_sc=na_s, no super-clusters used) */
- int natpair_ljq; /* Total number of atom pairs for LJ+Q kernel */
- int natpair_lj; /* Total number of atom pairs for LJ kernel */
- int natpair_q; /* Total number of atom pairs for Q kernel */
- t_nblist **nbl_fep; /* List of free-energy atom pair interactions */
- int64_t outerListCreationStep; /* Step at which the outer list was created */
+
+ /* Counts for debug printing */
+ int natpair_ljq; /* Total number of atom pairs for LJ+Q kernel */
+ int natpair_lj; /* Total number of atom pairs for LJ kernel */
+ int natpair_q; /* Total number of atom pairs for Q kernel */
+ std::vector<t_nblist *> nbl_fep; /* List of free-energy atom pair interactions */
+ int64_t outerListCreationStep; /* Step at which the outer list was created */
};
enum {
{
/* Constructor
*
- * \param[in] nb_kernel_type Type of non-bonded kernel
+ * \param[in] kernelType Type of non-bonded kernel
* \param[in] numEnergyGroups The number of energy groups
* \param[in] simdEnergyBufferStride Stride for entries in the energy buffers for SIMD kernels
* \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
*/
- nbnxn_atomdata_output_t(int nb_kernel_type,
+ nbnxn_atomdata_output_t(Nbnxm::KernelType kernelType,
int numEnergyGroups,
int simdEnergyBUfferStride,
gmx::PinningPolicy pinningPolicy);
const t_inputrec *ir,
const gmx_mtop_t *mtop,
matrix box,
- int nbnxnKernelType,
const interaction_const_t *ic,
NbnxnListParameters *listParams)
{
/* Initialize the parameters to no dynamic list pruning */
listParams->useDynamicPruning = false;
- const VerletbufListSetup ls = verletbufGetListSetup(nbnxnKernelType);
+ const VerletbufListSetup ls =
+ {
+ IClusterSizePerListType[listParams->pairlistType],
+ JClusterSizePerListType[listParams->pairlistType]
+ };
/* Currently emulation mode does not support dual pair-lists */
- const bool useGpu = (nbnxnKernelType == nbnxnk8x8x8_GPU);
+ const bool useGpu = (listParams->pairlistType == PairlistType::Hierarchical8x8);
if (supportsDynamicPairlistGenerationInterval(*ir) &&
getenv("GMX_DISABLE_DYNAMICPRUNING") == nullptr)
* \param[in] ir The input parameter record
* \param[in] mtop The global topology
* \param[in] box The unit cell
- * \param[in] nbnxnKernelType The type of nbnxn kernel used
* \param[in] ic The nonbonded interactions constants
* \param[in,out] listParams The list setup parameters
*/
const t_inputrec *ir,
const gmx_mtop_t *mtop,
matrix box,
- int nbnxnKernelType,
const interaction_const_t *ic,
NbnxnListParameters *listParams);
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ * \brief
+ * Implements functionality for nbnxn_pairlist_set_t.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_nbnxm
+ */
+
#include "gmxpre.h"
+#include "pairlistset.h"
+
#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/utility/gmxassert.h"
+
+/*! \cond INTERNAL */
+
+NbnxnListParameters::NbnxnListParameters(const Nbnxm::KernelType kernelType,
+ const real rlist) :
+ useDynamicPruning(false),
+ nstlistPrune(-1),
+ rlistOuter(rlist),
+ rlistInner(rlist),
+ numRollingParts(1)
+{
+ if (!Nbnxm::kernelTypeUsesSimplePairlist(kernelType))
+ {
+ pairlistType = PairlistType::Hierarchical8x8;
+ }
+ else
+ {
+ switch (Nbnxm::JClusterSizePerKernelType[kernelType])
+ {
+ case 2:
+ pairlistType = PairlistType::Simple4x2;
+ break;
+ case 4:
+ pairlistType = PairlistType::Simple4x4;
+ break;
+ case 8:
+ pairlistType = PairlistType::Simple4x8;
+ break;
+ default:
+ GMX_RELEASE_ASSERT(false, "Kernel type does not have a pairlist type");
+ }
+ }
+}
+
+nbnxn_pairlist_set_t::nbnxn_pairlist_set_t(const NbnxnListParameters &listParams) :
+ params(listParams)
+{
+ // TODO move this into this constructor
+ nbnxn_init_pairlist_set(this);
+}
int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
const Nbnxm::InteractionLocality iLocality,
const int64_t step)
{
- return step - nbv.pairlistSets[iLocality].outerListCreationStep;
+ return step - nbv.pairlistSet(iLocality).outerListCreationStep;
}
bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t &nbv,
{
return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
}
+
+/*! \endcond */
gmx_bool bFEP,
int nthread_max);
-/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t */
-void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
- gmx_bool simple, gmx_bool combined);
+/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t
+ *
+ * TODO: Merge into the constructor
+ */
+void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list);
/*! \brief Prepare the list-set produced by the search for dynamic pruning
*
#include "kernels_simd_4xm/kernel_prune.h"
-void NbnxnDispatchPruneKernel(nonbonded_verlet_t *nbv,
- const Nbnxm::InteractionLocality ilocality,
- const rvec *shift_vec)
+void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t *nbl_lists,
+ const Nbnxm::KernelType kernelType,
+ const nbnxn_atomdata_t *nbat,
+ const rvec *shift_vec)
{
- nbnxn_pairlist_set_t *nbl_lists = &nbv->pairlistSets[ilocality];
- const nbnxn_atomdata_t *nbat = nbv->nbat;
- const real rlistInner = nbv->listParams->rlistInner;
+ const real rlistInner = nbl_lists->params.rlistInner;
GMX_ASSERT(nbl_lists->nbl[0]->ciOuter.size() >= nbl_lists->nbl[0]->ci.size(),
"Here we should either have an empty ci list or ciOuter should be >= ci");
{
NbnxnPairlistCpu *nbl = nbl_lists->nbl[i];
- switch (nbv->kernelType_)
+ switch (kernelType)
{
- case nbnxnk4xN_SIMD_4xN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
nbnxn_kernel_prune_4xn(nbl, nbat, shift_vec, rlistInner);
break;
- case nbnxnk4xN_SIMD_2xNN:
+ case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
nbnxn_kernel_prune_2xnn(nbl, nbat, shift_vec, rlistInner);
break;
- case nbnxnk4x4_PlainC:
+ case Nbnxm::KernelType::Cpu4x4_PlainC:
nbnxn_kernel_prune_ref(nbl, nbat, shift_vec, rlistInner);
break;
default:
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/taskassignment/taskassignment.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/baseversion.h"
struct gmx_mtop_t;
struct t_inputrec;
-enum class EmulateGpuNonbonded : bool;
-
namespace gmx
{
Gpu
};
+//! Help pass GPU-emulation parameters with type safety.
+enum class EmulateGpuNonbonded : bool
+{
+ //! Do not emulate GPUs.
+ No,
+ //! Do emulate GPUs.
+ Yes
+};
+
/*! \brief Decide whether this thread-MPI simulation will run
* nonbonded tasks on GPUs.
*