set = &pme_lb->setup[pme_lb->cur];
- NbnxnListParameters *listParams = nbv->listParams.get();
-
ic->rcoulomb = set->rcut_coulomb;
- listParams->rlistOuter = set->rlistOuter;
- listParams->rlistInner = set->rlistInner;
+ nbv->pairlistSets_->changeRadii(set->rlistOuter, set->rlistInner);
ic->ewaldcoeff_q = set->ewaldcoeff_q;
/* TODO: centralize the code that sets the potentials shifts */
if (ic->coulomb_modifier == eintmodPOTSHIFT)
/* We always re-initialize the tables whether they are used or not */
init_interaction_const_tables(nullptr, ic, rtab);
- Nbnxm::gpu_pme_loadbal_update_param(nbv, ic, listParams);
+ Nbnxm::gpu_pme_loadbal_update_param(nbv, ic, &nbv->pairlistSets().params());
if (!pme_lb->bSepPMERanks)
{
* This also ensures that we won't disable the currently
* optimal setting during a second round of PME balancing.
*/
- set_dd_dlb_max_cutoff(cr, fr->nbv->listParams->rlistOuter);
+ set_dd_dlb_max_cutoff(cr, fr->nbv->pairlistSets().params().rlistOuter);
}
}
step);
/* Update deprecated rlist in forcerec to stay in sync with fr->nbv */
- fr->rlist = fr->nbv->listParams->rlistOuter;
+ fr->rlist = fr->nbv->pairlistSets().params().rlistOuter;
if (ir.eDispCorr != edispcNO)
{
/* When dynamic pair-list pruning is requested, we need to prune
* at nstlistPrune steps.
*/
- if (nbv->listParams->useDynamicPruning &&
- nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
+ if (nbv->pairlistSets().isDynamicPairlistPruningStep(step))
{
/* Prune the pair-list beyond fr->ic->rlistPrune using
* the current coordinates of the atoms.
wallcycle_sub_start(wcycle, ewcsNONBONDED);
}
- NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
+ nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, fr, enerd, nrnb);
if (!nbv->useGpu())
{
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
- nbv->nbat, as_rvec_array(force->unpaddedArrayRef().data()), wcycle);
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+ as_rvec_array(force->unpaddedArrayRef().data()), wcycle);
}
}
}
* With domain decomposition we alternate local and non-local
* pruning at even and odd steps.
*/
- int numRollingParts = nbv->listParams->numRollingParts;
- GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
- int stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
+ int numRollingParts = nbv->pairlistSets().params().numRollingParts;
+ GMX_ASSERT(numRollingParts == nbv->pairlistSets().params().nstlistPrune/2,
+ "Since we alternate local/non-local at even/odd steps, "
+ "we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
+ int stepWithCurrentList = nbv->pairlistSets().numStepsWithPairlist(step);
bool stepIsEven = ((stepWithCurrentList & 1) == 0);
if (stepWithCurrentList > 0 &&
stepWithCurrentList < inputrec->nstlist - 1 &&
step, nrnb, wcycle);
}
- const Nbnxm::InteractionLocality iloc =
- (!bUseOrEmulGPU ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal);
-
/* Add all the non-bonded force to the normal force array.
* This can be split into a local and a non-local part when overlapping
* communication with calculation with domain decomposition.
*/
wallcycle_stop(wcycle, ewcFORCE);
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::All, nbv->nbat, f, wcycle);
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, f, wcycle);
wallcycle_start_nocount(wcycle, ewcFORCE);
- /* if there are multiple fshift output buffers reduce them */
- if ((flags & GMX_FORCE_VIRIAL) &&
- nbv->pairlistSet(iloc).nnbl > 1)
+ /* If there are multiple fshift output buffers we need to reduce them */
+ if (flags & GMX_FORCE_VIRIAL)
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
wallcycle_stop(wcycle, ewcFORCE);
}
- /* skip the reduction if there was no non-local work to do */
- if (!nbv->pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
- {
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
- nbv->nbat, f, wcycle);
- }
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
+ f, wcycle);
}
}
Nbnxm::gpu_clear_outputs(nbv->gpu_nbv, flags);
/* Is dynamic pair-list pruning activated? */
- if (nbv->listParams->useDynamicPruning)
+ if (nbv->pairlistSets().params().useDynamicPruning)
{
launchGpuRollingPruning(cr, nbv, inputrec, step);
}
* on the non-alternating path. */
if (bUseOrEmulGPU && !alternateGpuWait)
{
- nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
- nbv->nbat, f, wcycle);
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
+ f, wcycle);
}
if (DOMAINDECOMP(cr))
{
if (bPMETune)
{
pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
- *fr->ic, *fr->nbv->listParams, fr->pmedata, use_GPU(fr->nbv),
+ *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv),
&bPMETunePrinting);
}
#include "gromacs/mdtypes/mdatom.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_geometry.h"
+#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/simd/simd.h"
#include "gromacs/timing/wallcycle.h"
static void
nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search *nbs,
const nbnxn_atomdata_t *nbat,
- gmx::ArrayRef<nbnxn_atomdata_output_t> out,
+ gmx::ArrayRef<const nbnxn_atomdata_output_t> out,
int nfa,
int a0, int a1,
rvec *f)
}
/* Add the force array(s) from nbnxn_atomdata_t to f */
-void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search *nbs,
- const Nbnxm::AtomLocality locality,
- nbnxn_atomdata_t *nbat,
- rvec *f,
- gmx_wallcycle *wcycle)
+void
+nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality,
+ rvec *f,
+ gmx_wallcycle *wcycle)
{
+ /* Skip the non-local reduction if there was no non-local work to do */
+ if (locality == Nbnxm::AtomLocality::NonLocal &&
+ pairlistSets().pairlistSet(Nbnxm::InteractionLocality::NonLocal).nblGpu[0]->sci.empty())
+ {
+ return;
+ }
+
wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
{
try
{
- nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat,
+ nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat,
nbat->out,
1,
a0+((th+0)*na)/nth,
{
gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
+ if (outputBuffers.size() == 1)
+ {
+ /* When there is a single output object, with CPU or GPU, shift forces
+ * have been written directly to the main buffer instead of to the
+ * (single) thread local output object. There is nothing to reduce.
+ */
+ return;
+ }
+
for (int s = 0; s < SHIFTS; s++)
{
rvec sum;
struct nbnxn_atomdata_t;
struct nbnxn_search;
+struct nonbonded_verlet_t;
struct t_mdatoms;
struct gmx_wallcycle;
nbnxn_atomdata_t *nbat,
gmx_wallcycle *wcycle);
-/* Add the forces stored in nbat to f, zeros the forces in nbat */
-void nbnxn_atomdata_add_nbat_f_to_f(nbnxn_search *nbs,
- Nbnxm::AtomLocality locality,
- nbnxn_atomdata_t *nbat,
- rvec *f,
- gmx_wallcycle *wcycle);
-
/* Add the fshift force stored in nbat to fshift */
void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
rvec *fshift);
grid->bSimple = nbv->pairlistIsSimple();
- grid->na_c = IClusterSizePerListType[nbv->listParams->pairlistType];
- grid->na_cj = JClusterSizePerListType[nbv->listParams->pairlistType];
+ grid->na_c = IClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
+ grid->na_cj = JClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
grid->na_c_2log = get_2log(grid->na_c);
}
static void accountFlops(t_nrnb *nrnb,
+ const nbnxn_pairlist_set_t &pairlistSet,
const nonbonded_verlet_t &nbv,
- const Nbnxm::InteractionLocality iLocality,
const interaction_const_t &ic,
const int forceFlags)
{
- const nbnxn_pairlist_set_t &pairlistSet = nbv.pairlistSet(iLocality);
- const bool usingGpuKernels = nbv.useGpu();
+ const bool usingGpuKernels = nbv.useGpu();
- int enr_nbnxn_kernel_ljc;
+ int enr_nbnxn_kernel_ljc;
if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
}
}
-void NbnxnDispatchKernel(nonbonded_verlet_t *nbv,
- Nbnxm::InteractionLocality iLocality,
- const interaction_const_t &ic,
- int forceFlags,
- int clearF,
- t_forcerec *fr,
- gmx_enerdata_t *enerd,
- t_nrnb *nrnb)
+void
+nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
+ const interaction_const_t &ic,
+ int forceFlags,
+ int clearF,
+ t_forcerec *fr,
+ gmx_enerdata_t *enerd,
+ t_nrnb *nrnb)
{
- const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSet(iLocality);
+ const nbnxn_pairlist_set_t &pairlistSet = pairlistSets().pairlistSet(iLocality);
- switch (nbv->kernelSetup().kernelType)
+ switch (kernelSetup().kernelType)
{
case Nbnxm::KernelType::Cpu4x4_PlainC:
case Nbnxm::KernelType::Cpu4xN_Simd_4xN:
case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
nbnxn_kernel_cpu(pairlistSet,
- nbv->kernelSetup(),
- nbv->nbat,
+ kernelSetup(),
+ nbat,
ic,
fr->shift_vec,
forceFlags,
break;
case Nbnxm::KernelType::Gpu8x8x8:
- Nbnxm::gpu_launch_kernel(nbv->gpu_nbv, forceFlags, iLocality);
+ Nbnxm::gpu_launch_kernel(gpu_nbv, forceFlags, iLocality);
break;
case Nbnxm::KernelType::Cpu8x8x8_PlainC:
nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
- nbv->nbat, &ic,
+ nbat, &ic,
fr->shift_vec,
forceFlags,
clearF,
- nbv->nbat->out[0].f,
+ nbat->out[0].f,
fr->fshift[0],
enerd->grpp.ener[egCOULSR],
fr->bBHAM ?
}
- accountFlops(nrnb, *nbv, iLocality, ic, forceFlags);
+ accountFlops(nrnb, pairlistSet, *this, ic, forceFlags);
}
void
const int forceFlags,
t_nrnb *nrnb)
{
- const gmx::ArrayRef<t_nblist const * const > nbl_fep = pairlistSet(iLocality).nbl_fep;
+ const gmx::ArrayRef<t_nblist const * const > nbl_fep = pairlistSets().pairlistSet(iLocality).nbl_fep;
/* When the first list is empty, all are empty and there is nothing to do */
if (nbl_fep[0]->nrj == 0)
#include <memory>
#include "gromacs/math/vectypes.h"
-#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
struct gmx_enerdata_t;
struct gmx_hw_info_t;
struct gmx_mtop_t;
+struct gmx_wallcycle;
struct interaction_const_t;
struct nbnxn_pairlist_set_t;
struct nbnxn_search;
struct nonbonded_verlet_t;
+enum class PairlistType;
struct t_blocka;
struct t_commrec;
struct t_lambda;
class UpdateGroupsCog;
}
+namespace Nbnxm
+{
+enum class KernelType;
+}
+
+/*! \libinternal
+ * \brief The setup for generating and pruning the nbnxn pair list.
+ *
+ * Without dynamic pruning rlistOuter=rlistInner.
+ */
+struct NbnxnListParameters
+{
+ /*! \brief Constructor producing a struct with dynamic pruning disabled
+ */
+ NbnxnListParameters(Nbnxm::KernelType kernelType,
+ real rlist);
+
+ PairlistType pairlistType; //!< The type of cluster-pair list
+ bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
+ int nstlistPrune; //!< Pair-list dynamic pruning interval
+ real rlistOuter; //!< Cut-off of the larger, outer pair-list
+ real rlistInner; //!< Cut-off of the smaller, inner pair-list
+ int numRollingParts; //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
+};
+
/*! \brief Resources that can be used to execute non-bonded kernels on */
enum class NonbondedResource : int
{
struct nonbonded_verlet_t
{
public:
+ class PairlistSets
+ {
+ public:
+ PairlistSets(const NbnxnListParameters &listParams,
+ bool haveMultipleDomains,
+ int minimumIlistCountForGpuBalancing);
+
+ //! Construct the pairlist set for the given locality
+ void construct(Nbnxm::InteractionLocality iLocality,
+ nbnxn_search *nbs,
+ nbnxn_atomdata_t *nbat,
+ const t_blocka *excl,
+ Nbnxm::KernelType kernelbType,
+ int64_t step,
+ t_nrnb *nrnb);
+
+ //! Dispatches the dynamic pruning kernel for the given locality
+ void dispatchPruneKernel(Nbnxm::InteractionLocality iLocality,
+ const nbnxn_atomdata_t *nbat,
+ const rvec *shift_vec,
+ Nbnxm::KernelType kernelbType);
+
+ //! Returns the pair list parameters
+ const NbnxnListParameters ¶ms() const
+ {
+ return params_;
+ }
+
+ //! Returns the number of steps performed with the current pair list
+ int numStepsWithPairlist(int64_t step) const
+ {
+ return step - outerListCreationStep_;
+ }
+
+ //! Returns whether step is a dynamic list pruning step, for CPU lists only
+ bool isDynamicPairlistPruningStep(int64_t step) const
+ {
+ return (params_.useDynamicPruning &&
+ numStepsWithPairlist(step) % params_.nstlistPrune == 0);
+ }
+
+ //! Changes the pair-list outer and inner radius
+ void changeRadii(real rlistOuter,
+ real rlistInner)
+ {
+ params_.rlistOuter = rlistOuter;
+ params_.rlistInner = rlistInner;
+ }
+
+ //! Returns the pair-list set for the given locality
+ const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
+ {
+ if (iLocality == Nbnxm::InteractionLocality::Local)
+ {
+ return *localSet_;
+ }
+ else
+ {
+ GMX_ASSERT(nonlocalSet_, "Need a non-local set when requesting access");
+ return *nonlocalSet_;
+ }
+ }
+
+ private:
+ //! Returns the pair-list set for the given locality
+ nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality)
+ {
+ if (iLocality == Nbnxm::InteractionLocality::Local)
+ {
+ return *localSet_;
+ }
+ else
+ {
+ GMX_ASSERT(nonlocalSet_, "Need a non-local set when requesting access");
+ return *nonlocalSet_;
+ }
+ }
+
+ //! Parameters for the search and list pruning setup
+ NbnxnListParameters params_;
+ //! Pair list balancing parameter for use with GPU
+ int minimumIlistCountForGpuBalancing_;
+ //! Local pairlist set
+ std::unique_ptr<nbnxn_pairlist_set_t> localSet_;
+ //! Non-local pairlist set
+ std::unique_ptr<nbnxn_pairlist_set_t> nonlocalSet_;
+ //! MD step at with the outer lists in pairlistSets_ were created
+ int64_t outerListCreationStep_;
+ };
+
//! Returns whether a GPU is use for the non-bonded calculations
bool useGpu() const
{
//! Initialize the pair list sets, TODO this should be private
void initPairlistSets(bool haveMultipleDomains);
- //! Returns a reference to the pairlist set for the requested locality
- const nbnxn_pairlist_set_t &pairlistSet(Nbnxm::InteractionLocality iLocality) const
- {
- GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
- "The requested locality should be in the list");
- return pairlistSets_[static_cast<int>(iLocality)];
- }
-
//! Constructs the pairlist for the given locality
void constructPairlist(Nbnxm::InteractionLocality iLocality,
const t_blocka *excl,
int64_t step,
- t_nrnb *nrnb)
+ t_nrnb *nrnb);
+
+ //! Returns a reference to the pairlist sets
+ const PairlistSets &pairlistSets() const
{
- nbnxn_make_pairlist(this, iLocality, &pairlistSets_[static_cast<int>(iLocality)], excl, step, nrnb);
+ return *pairlistSets_;
}
//! Dispatches the dynamic pruning kernel for the given locality
void dispatchPruneKernel(Nbnxm::InteractionLocality iLocality,
- const rvec *shift_vec)
- {
- GMX_ASSERT(static_cast<size_t>(iLocality) < pairlistSets_.size(),
- "The requested locality should be in the list");
- NbnxnDispatchPruneKernel(&pairlistSets_[static_cast<int>(iLocality)],
- kernelSetup_.kernelType, nbat, shift_vec);
- }
-
- //! Dispatches the non-bonded free-energy kernel, always runs on the CPU
+ const rvec *shift_vec);
+
+ //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
+ void dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
+ const interaction_const_t &ic,
+ int forceFlags,
+ int clearF,
+ t_forcerec *fr,
+ gmx_enerdata_t *enerd,
+ t_nrnb *nrnb);
+
+ //! Executes the non-bonded free-energy kernel, always runs on the CPU
void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
t_forcerec *fr,
rvec x[],
int forceFlags,
t_nrnb *nrnb);
+ //! Add the forces stored in nbat to f, zeros the forces in nbat */
+ void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
+ rvec *f,
+ gmx_wallcycle *wcycle);
+
//! Return the kernel setup
const Nbnxm::KernelSetup &kernelSetup() const
{
kernelSetup_ = kernelSetup;
}
- //! Parameters for the search and list pruning setup
- std::unique_ptr<NbnxnListParameters> listParams;
+ // TODO: Make all data members private
+ public:
+ //! All data related to the pair lists
+ std::unique_ptr<PairlistSets> pairlistSets_;
//! Working data for constructing the pairlists
std::unique_ptr<nbnxn_search> nbs;
- private:
- //! Local and, optionally, non-local pairlist sets
- std::vector<nbnxn_pairlist_set_t> pairlistSets_;
- public:
//! Atom data
nbnxn_atomdata_t *nbat;
public:
gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */
- int min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels */
};
namespace Nbnxm
/*! \brief Returns the index position of the atoms on the pairlist search grid */
gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
-/*! \brief Returns the number of steps performed with the current pair list */
-int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
- Nbnxm::InteractionLocality ilocality,
- int64_t step);
-
-/*! \brief Returns whether step is a dynamic list pruning step */
-bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t &nbv,
- Nbnxm::InteractionLocality ilocality,
- int64_t step);
-
-/*! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU */
-void NbnxnDispatchKernel(nonbonded_verlet_t *nbv,
- Nbnxm::InteractionLocality iLocality,
- const interaction_const_t &ic,
- int forceFlags,
- int clearF,
- t_forcerec *fr,
- gmx_enerdata_t *enerd,
- t_nrnb *nrnb);
-
#endif // GMX_NBNXN_NBNXN_H
} // namespace Nbnxm
-void nonbonded_verlet_t::initPairlistSets(const bool haveMultipleDomains)
+nonbonded_verlet_t::PairlistSets::PairlistSets(const NbnxnListParameters &listParams,
+ const bool haveMultipleDomains,
+ const int minimumIlistCountForGpuBalancing) :
+ params_(listParams),
+ minimumIlistCountForGpuBalancing_(minimumIlistCountForGpuBalancing)
{
- pairlistSets_.emplace_back(*listParams);
+ localSet_ = std::make_unique<nbnxn_pairlist_set_t>(params_);
+
if (haveMultipleDomains)
{
- pairlistSets_.emplace_back(*listParams);
+ nonlocalSet_ = std::make_unique<nbnxn_pairlist_set_t>(params_);
}
}
namespace Nbnxm
{
+/*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
+static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t *nbnxmGpu)
+{
+ int minimumIlistCount;
+
+ if (const char *env = getenv("GMX_NB_MIN_CI"))
+ {
+ char *end;
+
+ minimumIlistCount = strtol(env, &end, 10);
+ if (!end || (*end != 0) || minimumIlistCount < 0)
+ {
+ gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, non-negative integer required", env);
+ }
+
+ if (debug)
+ {
+ fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
+ minimumIlistCount);
+ }
+ }
+ else
+ {
+ minimumIlistCount = gpu_min_ci_balanced(nbnxmGpu);
+ if (debug)
+ {
+ fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
+ minimumIlistCount);
+ }
+ }
+
+ return minimumIlistCount;
+}
+
void init_nb_verlet(const gmx::MDLogger &mdlog,
nonbonded_verlet_t **nb_verlet,
gmx_bool bFEP_NonBonded,
nonbondedResource, ir,
fr->bNonbonded));
- const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
-
- nbv->listParams = std::make_unique<NbnxnListParameters>(nbv->kernelSetup().kernelType,
- ir->rlist);
- nbv->initPairlistSets(haveMultipleDomains);
+ const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
- nbv->min_ci_balanced = 0;
+ NbnxnListParameters listParams(nbv->kernelSetup().kernelType, ir->rlist);
setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic,
- nbv->listParams.get());
-
- nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
- DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
- DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
- bFEP_NonBonded,
- gmx_omp_nthreads_get(emntPairsearch));
+ &listParams);
int enbnxninitcombrule;
if (fr->ic->vdwtype == evdwCUT &&
mimimumNumEnergyGroupNonbonded,
nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+ int minimumIlistCountForGpuBalancing = 0;
if (useGpu)
{
/* init the NxN GPU data; the last argument tells whether we'll have
gpu_init(&nbv->gpu_nbv,
deviceInfo,
fr->ic,
- nbv->listParams.get(),
+ &listParams,
nbv->nbat,
cr->nodeid,
haveMultipleDomains);
- if (const char *env = getenv("GMX_NB_MIN_CI"))
- {
- char *end;
-
- nbv->min_ci_balanced = strtol(env, &end, 10);
- if (!end || (*end != 0) || nbv->min_ci_balanced < 0)
- {
- gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, non-negative integer required", env);
- }
-
- if (debug)
- {
- fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
- nbv->min_ci_balanced);
- }
- }
- else
- {
- nbv->min_ci_balanced = gpu_min_ci_balanced(nbv->gpu_nbv);
- if (debug)
- {
- fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
- nbv->min_ci_balanced);
- }
- }
-
+ minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(nbv->gpu_nbv);
}
+ nbv->pairlistSets_ =
+ std::make_unique<nonbonded_verlet_t::PairlistSets>(listParams,
+ haveMultipleDomains,
+ minimumIlistCountForGpuBalancing);
+
+ nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
+ DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+ DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
+ bFEP_NonBonded,
+ gmx_omp_nthreads_get(emntPairsearch));
+
*nb_verlet = nbv;
}
std::swap(nbl->sci, work.sci_sort);
}
-void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
- const InteractionLocality iLocality,
- nbnxn_pairlist_set_t *nbl_list,
- const t_blocka *excl,
- const int64_t step,
- t_nrnb *nrnb)
+void
+nonbonded_verlet_t::PairlistSets::construct(const InteractionLocality iLocality,
+ nbnxn_search *nbs,
+ nbnxn_atomdata_t *nbat,
+ const t_blocka *excl,
+ const Nbnxm::KernelType kernelType,
+ const int64_t step,
+ t_nrnb *nrnb)
{
- nbnxn_search *nbs = nbv->nbs.get();
- nbnxn_atomdata_t *nbat = nbv->nbat;
- const real rlist = nbv->listParams->rlistOuter;
+ nbnxn_pairlist_set_t *nbl_list = &pairlistSet(iLocality);
+
+ const real rlist = nbl_list->params.rlistOuter;
int nsubpair_target;
float nsubpair_tot_est;
nzi = nbs->zones->nizone;
}
- if (!nbl_list->bSimple && nbv->min_ci_balanced > 0)
+ if (!nbl_list->bSimple && minimumIlistCountForGpuBalancing_ > 0)
{
- get_nsubpair_target(nbs, iLocality, rlist, nbv->min_ci_balanced,
+ get_nsubpair_target(nbs, iLocality, rlist, minimumIlistCountForGpuBalancing_,
&nsubpair_target, &nsubpair_tot_est);
}
else
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nbv->kernelSetup().kernelType,
+ kernelType,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nbv->kernelSetup().kernelType,
+ kernelType,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
GMX_ASSERT(nbl_list->nbl[0]->ciOuter.empty(), "ciOuter is invalid so it should be empty");
}
- nbl_list->outerListCreationStep = step;
+ if (iLocality == Nbnxm::InteractionLocality::Local)
+ {
+ outerListCreationStep_ = step;
+ }
+ else
+ {
+ GMX_RELEASE_ASSERT(outerListCreationStep_ == step,
+ "Outer list should be created at the same step as the inner list");
+ }
/* Special performance logging stuff (env.var. GMX_NBNXN_CYCLE) */
if (iLocality == InteractionLocality::Local)
}
}
- if (nbv->listParams->useDynamicPruning && !nbv->useGpu())
+ if (params_.useDynamicPruning && nbl_list->bSimple)
{
nbnxnPrepareListForDynamicPruning(nbl_list);
}
+}
+
+void
+nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality iLocality,
+ const t_blocka *excl,
+ int64_t step,
+ t_nrnb *nrnb)
+{
+ pairlistSets_->construct(iLocality, nbs.get(), nbat, excl,
+ kernelSetup_.kernelType,
+ step, nrnb);
- if (nbv->useGpu())
+ if (useGpu())
{
/* Launch the transfer of the pairlist to the GPU.
*
* NOTE: The launch overhead is currently not timed separately
*/
- Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
- nbl_list->nblGpu[0],
+ Nbnxm::gpu_init_pairlist(gpu_nbv,
+ pairlistSets().pairlistSet(iLocality).nblGpu[0],
iLocality);
}
}
// to include it during OpenCL jitting without including config.h
#include "gromacs/nbnxm/constants.h"
+struct NbnxnListParameters;
struct NbnxnPairlistCpuWork;
struct NbnxnPairlistGpuWork;
struct tMPI_Atomic;
static constexpr gmx::EnumerationArray<PairlistType, int> IClusterSizePerListType = { 4, 4, 4, 8 };
static constexpr gmx::EnumerationArray<PairlistType, int> JClusterSizePerListType = { 2, 4, 8, 8 };
-/*! \cond INTERNAL */
-
-/*! \brief The setup for generating and pruning the nbnxn pair list.
- *
- * Without dynamic pruning rlistOuter=rlistInner.
- */
-struct NbnxnListParameters
-{
- /*! \brief Constructor producing a struct with dynamic pruning disabled
- */
- NbnxnListParameters(Nbnxm::KernelType kernelType,
- real rlist);
-
- PairlistType pairlistType; //!< The type of cluster-pair list
- bool useDynamicPruning; //!< Are we using dynamic pair-list pruning
- int nstlistPrune; //!< Pair-list dynamic pruning interval
- real rlistOuter; //!< Cut-off of the larger, outer pair-list
- real rlistInner; //!< Cut-off of the smaller, inner pair-list
- int numRollingParts; //!< The number parts to divide the pair-list into for rolling pruning, a value of 1 gives no rolling pruning
-};
-
-/*! \endcond */
-
/* With CPU kernels the i-cluster size is always 4 atoms. */
static constexpr int c_nbnxnCpuIClusterSize = 4;
int natpair_lj; /* Total number of atom pairs for LJ kernel */
int natpair_q; /* Total number of atom pairs for Q kernel */
std::vector<t_nblist *> nbl_fep; /* List of free-energy atom pair interactions */
- int64_t outerListCreationStep; /* Step at which the outer list was created */
};
enum {
nbnxn_init_pairlist_set(this);
}
-int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
- const Nbnxm::InteractionLocality iLocality,
- const int64_t step)
-{
- return step - nbv.pairlistSet(iLocality).outerListCreationStep;
-}
-
-bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t &nbv,
- const Nbnxm::InteractionLocality iLocality,
- const int64_t step)
-{
- return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
-}
-
/*! \endcond */
#define GMX_NBNXM_PAIRLISTSET_H
#include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
#include "locality.h"
-struct nbnxn_pairlist_set_t;
-
/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t
*
* TODO: Merge into the constructor
#include "kernels_simd_4xm/kernel_prune.h"
-void NbnxnDispatchPruneKernel(nbnxn_pairlist_set_t *nbl_lists,
- const Nbnxm::KernelType kernelType,
- const nbnxn_atomdata_t *nbat,
- const rvec *shift_vec)
+void
+nonbonded_verlet_t::PairlistSets::dispatchPruneKernel(const Nbnxm::InteractionLocality iLocality,
+ const nbnxn_atomdata_t *nbat,
+ const rvec *shift_vec,
+ const Nbnxm::KernelType kernelType)
{
- const real rlistInner = nbl_lists->params.rlistInner;
+ nbnxn_pairlist_set_t *nbl_lists = &pairlistSet(iLocality);
+
+ const real rlistInner = nbl_lists->params.rlistInner;
GMX_ASSERT(nbl_lists->nbl[0]->ciOuter.size() >= nbl_lists->nbl[0]->ci.size(),
"Here we should either have an empty ci list or ciOuter should be >= ci");
}
}
}
+
+void
+nonbonded_verlet_t::dispatchPruneKernel(const Nbnxm::InteractionLocality iLocality,
+ const rvec *shift_vec)
+{
+ pairlistSets_->dispatchPruneKernel(iLocality, nbat, shift_vec, kernelSetup_.kernelType);
+}