return;
}
- nonbonded_verlet_t *nbv = fr->nbv;
- nonbonded_verlet_group_t *nbvg = &nbv->grp[ilocality];
+ nonbonded_verlet_t *nbv = fr->nbv;
/* GPU kernel launch overhead is already timed separately */
if (fr->cutoff_scheme != ecutsVERLET)
gmx_incons("Invalid cut-off scheme passed!");
}
- bool bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
-
- if (!bUsingGpuKernels)
+ if (!nbv->useGpu())
{
/* When dynamic pair-list pruning is requested, we need to prune
* at nstlistPrune steps.
*/
if (nbv->listParams->useDynamicPruning &&
- (step - nbvg->nbl_lists.outerListCreationStep) % nbv->listParams->nstlistPrune == 0)
+ nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
{
/* Prune the pair-list beyond fr->ic->rlistPrune using
* the current coordinates of the atoms.
NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
- if (!bUsingGpuKernels)
+ if (!nbv->useGpu())
{
wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
{
- return nbv != nullptr && nbv->bUseGPU;
+ return nbv != nullptr && nbv->useGpu();
}
static inline void clear_rvecs_omp(int n, rvec v[])
*/
int numRollingParts = nbv->listParams->numRollingParts;
GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
- int stepWithCurrentList = step - nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.outerListCreationStep;
+ int stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
bool stepIsEven = ((stepWithCurrentList & 1) == 0);
if (stepWithCurrentList > 0 &&
stepWithCurrentList < inputrec->nstlist - 1 &&
bFillGrid = (bNS && bStateChanged);
bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
bDoForces = ((flags & GMX_FORCE_FORCES) != 0);
- bUseGPU = fr->nbv->bUseGPU;
- bUseOrEmulGPU = bUseGPU || (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes);
+ bUseGPU = fr->nbv->useGpu();
+ bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu();
const auto pmeRunMode = fr->pmedata ? pme_run_mode(fr->pmedata) : PmeRunMode::CPU;
// TODO slim this conditional down - inputrec and duty checks should mean the same in proper code!
if (!DOMAINDECOMP(cr))
{
wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
- nbnxn_put_on_grid(nbv->nbs.get(), fr->ePBC, box,
+ nbnxn_put_on_grid(nbv, box,
0, vzero, box_diag,
nullptr, 0, mdatoms->homenr, -1,
fr->cginfo, x.unpaddedArrayRef(),
- 0, nullptr,
- nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
- nbv->nbat);
+ 0, nullptr);
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
}
else
{
wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
- nbnxn_put_on_grid_nonlocal(nbv->nbs.get(), domdec_zones(cr->dd),
- fr->cginfo, x.unpaddedArrayRef(),
- nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
- nbv->nbat);
+ nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd),
+ fr->cginfo, x.unpaddedArrayRef());
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
}
/* do local pair search */
if (bNS)
{
- nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists;
-
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
- nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
- &top->excls,
- nbv->listParams->rlistOuter,
- nbv->min_ci_balanced,
- &pairlistSet,
- Nbnxm::InteractionLocality::Local,
- nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
- nrnb);
- pairlistSet.outerListCreationStep = step;
- if (nbv->listParams->useDynamicPruning && !bUseGPU)
- {
- nbnxnPrepareListForDynamicPruning(&pairlistSet);
- }
+ /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+ nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-
- if (bUseGPU)
- {
- /* initialize local pair-list on the GPU */
- Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
- pairlistSet.nblGpu[0],
- Nbnxm::InteractionLocality::Local);
- }
wallcycle_stop(wcycle, ewcNS);
}
else
do non-local pair search */
if (havePPDomainDecomposition(cr))
{
- nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists;
-
if (bNS)
{
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
- nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
- &top->excls,
- nbv->listParams->rlistOuter,
- nbv->min_ci_balanced,
- &pairlistSet,
- Nbnxm::InteractionLocality::NonLocal,
- nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
- nrnb);
- pairlistSet.outerListCreationStep = step;
- if (nbv->listParams->useDynamicPruning && !bUseGPU)
- {
- nbnxnPrepareListForDynamicPruning(&pairlistSet);
- }
+ /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+ nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
- if (nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type == nbnxnk8x8x8_GPU)
- {
- /* initialize non-local pair-list on the GPU */
- Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
- pairlistSet.nblGpu[0],
- Nbnxm::InteractionLocality::NonLocal);
- }
wallcycle_stop(wcycle, ewcNS);
}
else
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- if (fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.nbl_fep[0]->nrj > 0)
+ if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
}
if (DOMAINDECOMP(cr) &&
- fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
/* if there are multiple fshift output buffers reduce them */
if ((flags & GMX_FORCE_VIRIAL) &&
- nbv->grp[iloc].nbl_lists.nnbl > 1)
+ nbv->pairlistSets[iloc].nnbl > 1)
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
}
/* skip the reduction if there was no non-local work to do */
- if (!nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nblGpu[0]->sci.empty())
+ if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
{
nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
nbv->nbat, f, wcycle);
}
}
- if (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes)
+ if (fr->nbv->emulateGpu())
{
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway