case ecutsVERLET:
set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
- nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
+ nbnxn_put_on_grid(fr->nbv, state_local->box,
0,
comm->zones.size[0].bb_x0,
comm->zones.size[0].bb_x1,
comm->zones.dens_zone0,
fr->cginfo,
state_local->x,
- ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
- fr->nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
- fr->nbv->nbat);
+ ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr);
nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
break;
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/tables/forcetable.h"
}
}
-gmx_bool uses_simple_tables(int cutoff_scheme,
- nonbonded_verlet_t *nbv,
- int group)
+gmx_bool uses_simple_tables(int cutoff_scheme,
+ const nonbonded_verlet_t *nbv)
{
gmx_bool bUsesSimpleTables = TRUE;
- int grp_index;
switch (cutoff_scheme)
{
bUsesSimpleTables = TRUE;
break;
case ecutsVERLET:
- assert(nullptr != nbv);
- grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
- bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
+ GMX_RELEASE_ASSERT(nullptr != nbv, "A non-bonded verlet object is required with the Verlet cutoff-scheme");
+ bUsesSimpleTables = nbv->pairlistIsSimple();
break;
default:
gmx_incons("unimplemented");
void free_gpu_resources(t_forcerec *fr,
const gmx::PhysicalNodeCommunicator &physicalNodeCommunicator)
{
- bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->bUseGPU;
+ bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu();
/* stop the GPU profiler (only CUDA) */
stopGpuProfiler();
*/
void update_forcerec(t_forcerec *fr, matrix box);
-gmx_bool uses_simple_tables(int cutoff_scheme,
- nonbonded_verlet_t *nbv,
- int group);
+gmx_bool uses_simple_tables(int cutoff_scheme,
+ const nonbonded_verlet_t *nbv);
/* Returns whether simple tables (i.e. not for use with GPUs) are used
* with the type of kernel indicated.
*/
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/simd/simd.h"
#include "gromacs/topology/ifunc.h"
#include "gromacs/topology/topology.h"
return;
}
- nonbonded_verlet_t *nbv = fr->nbv;
- nonbonded_verlet_group_t *nbvg = &nbv->grp[ilocality];
+ nonbonded_verlet_t *nbv = fr->nbv;
/* GPU kernel launch overhead is already timed separately */
if (fr->cutoff_scheme != ecutsVERLET)
gmx_incons("Invalid cut-off scheme passed!");
}
- bool bUsingGpuKernels = (nbvg->kernel_type == nbnxnk8x8x8_GPU);
-
- if (!bUsingGpuKernels)
+ if (!nbv->useGpu())
{
/* When dynamic pair-list pruning is requested, we need to prune
* at nstlistPrune steps.
*/
if (nbv->listParams->useDynamicPruning &&
- (step - nbvg->nbl_lists.outerListCreationStep) % nbv->listParams->nstlistPrune == 0)
+ nbnxnIsDynamicPairlistPruningStep(*nbv, ilocality, step))
{
/* Prune the pair-list beyond fr->ic->rlistPrune using
* the current coordinates of the atoms.
NbnxnDispatchKernel(nbv, ilocality, *ic, flags, clearF, fr, enerd, nrnb);
- if (!bUsingGpuKernels)
+ if (!nbv->useGpu())
{
wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
{
- return nbv != nullptr && nbv->bUseGPU;
+ return nbv != nullptr && nbv->useGpu();
}
static inline void clear_rvecs_omp(int n, rvec v[])
*/
int numRollingParts = nbv->listParams->numRollingParts;
GMX_ASSERT(numRollingParts == nbv->listParams->nstlistPrune/2, "Since we alternate local/non-local at even/odd steps, we need numRollingParts<=nstlistPrune/2 for correctness and == for efficiency");
- int stepWithCurrentList = step - nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.outerListCreationStep;
+ int stepWithCurrentList = nbnxnNumStepsWithPairlist(*nbv, Nbnxm::InteractionLocality::Local, step);
bool stepIsEven = ((stepWithCurrentList & 1) == 0);
if (stepWithCurrentList > 0 &&
stepWithCurrentList < inputrec->nstlist - 1 &&
bFillGrid = (bNS && bStateChanged);
bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
bDoForces = ((flags & GMX_FORCE_FORCES) != 0);
- bUseGPU = fr->nbv->bUseGPU;
- bUseOrEmulGPU = bUseGPU || (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes);
+ bUseGPU = fr->nbv->useGpu();
+ bUseOrEmulGPU = bUseGPU || fr->nbv->emulateGpu();
const auto pmeRunMode = fr->pmedata ? pme_run_mode(fr->pmedata) : PmeRunMode::CPU;
// TODO slim this conditional down - inputrec and duty checks should mean the same in proper code!
if (!DOMAINDECOMP(cr))
{
wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
- nbnxn_put_on_grid(nbv->nbs.get(), fr->ePBC, box,
+ nbnxn_put_on_grid(nbv, box,
0, vzero, box_diag,
nullptr, 0, mdatoms->homenr, -1,
fr->cginfo, x.unpaddedArrayRef(),
- 0, nullptr,
- nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
- nbv->nbat);
+ 0, nullptr);
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
}
else
{
wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
- nbnxn_put_on_grid_nonlocal(nbv->nbs.get(), domdec_zones(cr->dd),
- fr->cginfo, x.unpaddedArrayRef(),
- nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
- nbv->nbat);
+ nbnxn_put_on_grid_nonlocal(nbv, domdec_zones(cr->dd),
+ fr->cginfo, x.unpaddedArrayRef());
wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
}
/* do local pair search */
if (bNS)
{
- nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists;
-
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
- nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
- &top->excls,
- nbv->listParams->rlistOuter,
- nbv->min_ci_balanced,
- &pairlistSet,
- Nbnxm::InteractionLocality::Local,
- nbv->grp[Nbnxm::InteractionLocality::Local].kernel_type,
- nrnb);
- pairlistSet.outerListCreationStep = step;
- if (nbv->listParams->useDynamicPruning && !bUseGPU)
- {
- nbnxnPrepareListForDynamicPruning(&pairlistSet);
- }
+ /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+ nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::Local,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
-
- if (bUseGPU)
- {
- /* initialize local pair-list on the GPU */
- Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
- pairlistSet.nblGpu[0],
- Nbnxm::InteractionLocality::Local);
- }
wallcycle_stop(wcycle, ewcNS);
}
else
do non-local pair search */
if (havePPDomainDecomposition(cr))
{
- nbnxn_pairlist_set_t &pairlistSet = nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists;
-
if (bNS)
{
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
- nbnxn_make_pairlist(nbv->nbs.get(), nbv->nbat,
- &top->excls,
- nbv->listParams->rlistOuter,
- nbv->min_ci_balanced,
- &pairlistSet,
- Nbnxm::InteractionLocality::NonLocal,
- nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type,
- nrnb);
- pairlistSet.outerListCreationStep = step;
- if (nbv->listParams->useDynamicPruning && !bUseGPU)
- {
- nbnxnPrepareListForDynamicPruning(&pairlistSet);
- }
+ /* Note that with a GPU the launch overhead of the list transfer is not timed separately */
+ nbnxn_make_pairlist(nbv, Nbnxm::InteractionLocality::NonLocal,
+ &top->excls, step, nrnb);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
-
- if (nbv->grp[Nbnxm::InteractionLocality::NonLocal].kernel_type == nbnxnk8x8x8_GPU)
- {
- /* initialize non-local pair-list on the GPU */
- Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
- pairlistSet.nblGpu[0],
- Nbnxm::InteractionLocality::NonLocal);
- }
wallcycle_stop(wcycle, ewcNS);
}
else
/* Calculate the local and non-local free energy interactions here.
* Happens here on the CPU both with and without GPU.
*/
- if (fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists.nbl_fep[0]->nrj > 0)
+ if (fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local].nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::Local].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::Local],
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
}
if (DOMAINDECOMP(cr) &&
- fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nbl_fep[0]->nrj > 0)
{
- do_nb_verlet_fep(&fr->nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists,
+ do_nb_verlet_fep(&fr->nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal],
fr, as_rvec_array(x.unpaddedArrayRef().data()), f, mdatoms,
inputrec->fepvals, lambda,
enerd, flags, nrnb, wcycle);
/* if there are multiple fshift output buffers reduce them */
if ((flags & GMX_FORCE_VIRIAL) &&
- nbv->grp[iloc].nbl_lists.nnbl > 1)
+ nbv->pairlistSets[iloc].nnbl > 1)
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
}
/* skip the reduction if there was no non-local work to do */
- if (!nbv->grp[Nbnxm::InteractionLocality::NonLocal].nbl_lists.nblGpu[0]->sci.empty())
+ if (!nbv->pairlistSets[Nbnxm::InteractionLocality::NonLocal].nblGpu[0]->sci.empty())
{
nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
nbv->nbat, f, wcycle);
}
}
- if (fr->nbv->emulateGpu == EmulateGpuNonbonded::Yes)
+ if (fr->nbv->emulateGpu())
{
// NOTE: emulation kernel is not included in the balancing region,
// but emulation mode does not target performance anyway
const interaction_const_t *ic,
const NbnxnListParameters *listParams)
{
- if (!nbv || nbv->grp[InteractionLocality::Local].kernel_type != nbnxnk8x8x8_GPU)
+ if (!nbv || !nbv->useGpu())
{
return;
}
* This function only operates on one domain of the domain decompostion.
* Note that without domain decomposition there is only one domain.
*/
-void nbnxn_put_on_grid(nbnxn_search_t nbs,
- int ePBC,
+void nbnxn_put_on_grid(nonbonded_verlet_t *nbv,
const matrix box,
int ddZone,
const rvec lowerCorner,
const int *atinfo,
gmx::ArrayRef<const gmx::RVec> x,
int numAtomsMoved,
- const int *move,
- int nb_kernel_type,
- nbnxn_atomdata_t *nbat)
+ const int *move)
{
+ nbnxn_search *nbs = nbv->nbs.get();
nbnxn_grid_t *grid = &nbs->grid[ddZone];
nbs_cycle_start(&nbs->cc[enbsCCgrid]);
- grid->bSimple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
+ grid->bSimple = nbv->pairlistIsSimple();
- grid->na_c = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
- grid->na_cj = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+ grid->na_c = nbnxn_kernel_to_cluster_i_size(nbv->kernelType_);
+ grid->na_cj = nbnxn_kernel_to_cluster_j_size(nbv->kernelType_);
grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
grid->na_c_2log = get_2log(grid->na_c);
if (ddZone == 0)
{
- nbs->ePBC = ePBC;
copy_mat(box, nbs->box);
/* Avoid zero density */
lowerCorner, upperCorner,
nbs->grid[0].atom_density);
+ nbnxn_atomdata_t *nbat = nbv->nbat;
+
calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
if (ddZone == 0)
}
/* Calls nbnxn_put_on_grid for all non-local domains */
-void nbnxn_put_on_grid_nonlocal(nbnxn_search_t nbs,
+void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t *nbv,
const struct gmx_domdec_zones_t *zones,
const int *atinfo,
- gmx::ArrayRef<const gmx::RVec> x,
- int nb_kernel_type,
- nbnxn_atomdata_t *nbat)
+ gmx::ArrayRef<const gmx::RVec> x)
{
for (int zone = 1; zone < zones->n; zone++)
{
c1[d] = zones->size[zone].bb_x1[d];
}
- nbnxn_put_on_grid(nbs, nbs->ePBC, nullptr,
+ nbnxn_put_on_grid(nbv, nullptr,
zone, c0, c1,
nullptr,
zones->cg_range[zone],
-1,
atinfo,
x,
- 0, nullptr,
- nb_kernel_type,
- nbat);
+ 0, nullptr);
}
}
{
/* \brief Constructor
*
+ * \param[in] ePBC The periodic boundary conditions
* \param[in] n_dd_cells The number of domain decomposition cells per dimension, without DD nullptr should be passed
* \param[in] zones The domain decomposition zone setup, without DD nullptr should be passed
* \param[in] bFEP Tells whether non-bonded interactions are perturbed
* \param[in] nthread_max The maximum number of threads used in the search
*/
- nbnxn_search(const ivec *n_dd_cells,
+ nbnxn_search(int ePBC,
+ const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
gmx_bool bFEP,
int nthread_max);
* Energy reduction, but not force and shift force reduction, is performed
* within this function.
*
- * \param[in] nbvg The group (local/non-local) to compute interaction for
+ * \param[in] pairlistSet Pairlists with local or non-local interactions to compute
+ * \param[in] kernel_type The non-bonded kernel type
+ * \param[in] ewald_excl The Ewald exclusion treatment
* \param[in,out] nbat The atomdata for the interactions
* \param[in] ic Non-bonded interaction constants
* \param[in] shiftVectors The PBC shift vectors
* \param[out] vVdw Output buffer for Van der Waals energies
*/
static void
-nbnxn_kernel_cpu(const nonbonded_verlet_group_t *nbvg,
+nbnxn_kernel_cpu(const nbnxn_pairlist_set_t &pairlistSet,
+ const int kernel_type,
+ const int ewald_excl,
nbnxn_atomdata_t *nbat,
const interaction_const_t &ic,
rvec *shiftVectors,
}
else
{
- if (nbvg->ewald_excl == ewaldexclTable)
+ if (ewald_excl == ewaldexclTable)
{
if (ic.rcoulomb == ic.rvdw)
{
{
vdwkt = vdwktLJEWALDCOMBLB;
/* At setup we (should have) selected the C reference kernel */
- GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
+ GMX_RELEASE_ASSERT(kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
}
}
else
GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
}
- int nnbl = nbvg->nbl_lists.nnbl;
- NbnxnPairlistCpu * const * nbl = nbvg->nbl_lists.nbl;
+ int nnbl = pairlistSet.nnbl;
+ NbnxnPairlistCpu * const * nbl = pairlistSet.nbl;
int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
#pragma omp parallel for schedule(static) num_threads(nthreads)
if (!(forceFlags & GMX_FORCE_ENERGY))
{
/* Don't calculate energies */
- switch (nbvg->kernel_type)
+ switch (kernel_type)
{
case nbnxnk4x4_PlainC:
nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
out->Vvdw[0] = 0;
out->Vc[0] = 0;
- switch (nbvg->kernel_type)
+ switch (kernel_type)
{
case nbnxnk4x4_PlainC:
nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
int unrollj = 0;
- switch (nbvg->kernel_type)
+ switch (kernel_type)
{
case nbnxnk4x4_PlainC:
unrollj = c_nbnxnCpuIClusterSize;
GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
}
- if (nbvg->kernel_type != nbnxnk4x4_PlainC)
+ if (kernel_type != nbnxnk4x4_PlainC)
{
switch (unrollj)
{
const interaction_const_t &ic,
const int forceFlags)
{
- const nonbonded_verlet_group_t &nbvg = nbv.grp[iLocality];
- const bool usingGpuKernels = (nbvg.kernel_type == nbnxnk8x8x8_GPU);
+ const nbnxn_pairlist_set_t &pairlistSet = nbv.pairlistSets[iLocality];
+ const bool usingGpuKernels = nbv.useGpu();
int enr_nbnxn_kernel_ljc;
if (EEL_RF(ic.eeltype) || ic.eeltype == eelCUT)
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
}
- else if ((!usingGpuKernels && nbvg.ewald_excl == ewaldexclAnalytical) ||
+ else if ((!usingGpuKernels && nbv.ewaldExclusionType_ == ewaldexclAnalytical) ||
(usingGpuKernels && Nbnxm::gpu_is_kernel_ewald_analytical(nbv.gpu_nbv)))
{
enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
}
inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
- nbvg.nbl_lists.natpair_ljq);
+ pairlistSet.natpair_ljq);
inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
- nbvg.nbl_lists.natpair_lj);
+ pairlistSet.natpair_lj);
/* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
- nbvg.nbl_lists.natpair_q);
+ pairlistSet.natpair_q);
const bool calcEnergy = ((forceFlags & GMX_FORCE_ENERGY) != 0);
if (ic.vdw_modifier == eintmodFORCESWITCH)
{
/* We add up the switch cost separately */
inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (calcEnergy ? 1 : 0),
- nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+ pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
}
if (ic.vdw_modifier == eintmodPOTSWITCH)
{
/* We add up the switch cost separately */
inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (calcEnergy ? 1 : 0),
- nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+ pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
}
if (ic.vdwtype == evdwPME)
{
/* We add up the LJ Ewald cost separately */
inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (calcEnergy ? 1 : 0),
- nbvg.nbl_lists.natpair_ljq + nbvg.nbl_lists.natpair_lj);
+ pairlistSet.natpair_ljq + pairlistSet.natpair_lj);
}
}
gmx_enerdata_t *enerd,
t_nrnb *nrnb)
{
- const nonbonded_verlet_group_t &nbvg = nbv->grp[iLocality];
+ const nbnxn_pairlist_set_t &pairlistSet = nbv->pairlistSets[iLocality];
- switch (nbvg.kernel_type)
+ switch (nbv->kernelType_)
{
case nbnxnk4x4_PlainC:
case nbnxnk4xN_SIMD_4xN:
case nbnxnk4xN_SIMD_2xNN:
- nbnxn_kernel_cpu(&nbvg,
+ nbnxn_kernel_cpu(pairlistSet,
+ nbv->kernelType_,
+ nbv->ewaldExclusionType_,
nbv->nbat,
ic,
fr->shift_vec,
break;
case nbnxnk8x8x8_PlainC:
- nbnxn_kernel_gpu_ref(nbvg.nbl_lists.nblGpu[0],
+ nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
nbv->nbat, &ic,
fr->shift_vec,
forceFlags,
#include "gromacs/math/vectypes.h"
#include "gromacs/nbnxm/pairlist.h"
-#include "gromacs/nbnxm/pairlistset.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
struct gmx_hw_info_t;
struct gmx_mtop_t;
struct interaction_const_t;
+struct nbnxn_pairlist_set_t;
+struct t_blocka;
struct t_commrec;
struct t_nrnb;
struct t_forcerec;
enbvClearFNo, enbvClearFYes
};
-/*! \libinternal
- * \brief Non-bonded interaction group data structure. */
-typedef struct nonbonded_verlet_group_t {
- nbnxn_pairlist_set_t nbl_lists; /**< pair list(s) */
- int kernel_type; /**< non-bonded kernel - see enum above */
- int ewald_excl; /**< Ewald exclusion - see enum above */
-} nonbonded_verlet_group_t;
-
/*! \libinternal
* \brief Top-level non-bonded data structure for the Verlet-type cut-off scheme. */
struct nonbonded_verlet_t
{
+ //! Returns whether a GPU is used for the non-bonded calculations
+ bool useGpu() const
+ {
+ return kernelType_ == nbnxnk8x8x8_GPU;
+ }
+
+ //! Returns whether a GPU is emulated for the non-bonded calculations
+ bool emulateGpu() const
+ {
+ return kernelType_ == nbnxnk8x8x8_PlainC;
+ }
+
+ //! Return whether the pairlist is of simple, CPU type
+ bool pairlistIsSimple() const
+ {
+ return !useGpu() && !emulateGpu();
+ }
+
std::unique_ptr<NbnxnListParameters> listParams; /**< Parameters for the search and list pruning setup */
std::unique_ptr<nbnxn_search> nbs; /**< n vs n atom pair searching data */
int ngrp; /**< number of interaction groups */
- //! Local and non-local interaction group
- gmx::EnumerationArray<Nbnxm::InteractionLocality, nonbonded_verlet_group_t> grp;
+ //! Local and non-local pairlist sets
+ gmx::EnumerationArray<Nbnxm::InteractionLocality, nbnxn_pairlist_set_t> pairlistSets;
//! Atom data
nbnxn_atomdata_t *nbat;
- gmx_bool bUseGPU; /**< TRUE when non-bonded interactions are computed on a physical GPU */
- EmulateGpuNonbonded emulateGpu; /**< true when non-bonded interactions are computed on the CPU using GPU-style pair lists */
- gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */
- int min_ci_balanced; /**< pair list balancing parameter
- used for the 8x8x8 GPU kernels */
+ //! Non-bonded kernel - see enum above
+ int kernelType_;
+ //! Ewald exclusion - see enum above
+ int ewaldExclusionType_;
+
+ gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */
+ int min_ci_balanced; /**< pair list balancing parameter used for the 8x8x8 GPU kernels */
};
namespace Nbnxm
* When move[i] < 0 particle i has migrated and will not be put on the grid.
* Without domain decomposition move will be NULL.
*/
-void nbnxn_put_on_grid(nbnxn_search_t nbs,
- int ePBC,
+void nbnxn_put_on_grid(nonbonded_verlet_t *nb_verlet,
const matrix box,
int ddZone,
const rvec lowerCorner,
const int *atinfo,
gmx::ArrayRef<const gmx::RVec> x,
int numAtomsMoved,
- const int *move,
- int nb_kernel_type,
- nbnxn_atomdata_t *nbat);
+ const int *move);
/*! \brief As nbnxn_put_on_grid, but for the non-local atoms
*
* with domain decomposition. Should be called after calling
* nbnxn_search_put_on_grid for the local atoms / home zone.
*/
-void nbnxn_put_on_grid_nonlocal(nbnxn_search_t nbs,
+void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t *nb_verlet,
const struct gmx_domdec_zones_t *zones,
const int *atinfo,
- gmx::ArrayRef<const gmx::RVec> x,
- int nb_kernel_type,
- nbnxn_atomdata_t *nbat);
+ gmx::ArrayRef<const gmx::RVec> x);
/*! \brief Returns the number of x and y cells in the local grid */
void nbnxn_get_ncells(nbnxn_search_t nbs, int *ncx, int *ncy);
/*! \brief Returns the index position of the atoms on the pairlist search grid */
gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search* nbs);
+/*! \brief Generates a pair-list for the given locality.
+ *
+ * With perturbed particles, also a group scheme style nbl_fep list is made.
+ */
+void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
+ Nbnxm::InteractionLocality iLocality,
+ const t_blocka *excl,
+ int64_t step,
+ t_nrnb *nrnb);
+
+/*! \brief Returns the number of steps performed with the current pair list */
+int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
+ Nbnxm::InteractionLocality ilocality,
+ int64_t step);
+
+/*! \brief Returns whether step is a dynamic list pruning step */
+bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t &nbv,
+ Nbnxm::InteractionLocality ilocality,
+ int64_t step);
+
/*! \brief Prune all pair-lists with given locality (currently CPU only)
*
* For all pair-lists with given locality, takes the outer list and prunes out
#include "gromacs/nbnxm/pairlist.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/real.h"
+
+bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
+{
+ if (nb_kernel_type == nbnxnkNotSet)
+ {
+ gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
+ }
+
+ switch (nb_kernel_type)
+ {
+ case nbnxnk8x8x8_GPU:
+ case nbnxnk8x8x8_PlainC:
+ return false;
+
+ case nbnxnk4x4_PlainC:
+ case nbnxnk4xN_SIMD_4xN:
+ case nbnxnk4xN_SIMD_2xNN:
+ return true;
+
+ default:
+ gmx_incons("Invalid nonbonded kernel type passed!");
+ return false;
+ }
+}
int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type)
{
return cj_size;
}
+
+/* Clusters at the cut-off only increase rlist by 60% of their size */
+static constexpr real c_nbnxnRlistIncreaseOutsideFactor = 0.6;
+
+real nbnxn_get_rlist_effective_inc(const int jClusterSize,
+ const real atomDensity)
+{
+ /* We should get this from the setup, but currently it's the same for
+ * all setups, including GPUs.
+ */
+ const real iClusterSize = c_nbnxnCpuIClusterSize;
+
+ const real iVolumeIncrease = (iClusterSize - 1)/atomDensity;
+ const real jVolumeIncrease = (jClusterSize - 1)/atomDensity;
+
+ return c_nbnxnRlistIncreaseOutsideFactor*std::cbrt(iVolumeIncrease +
+ jVolumeIncrease);
+}
+
+real nbnxn_get_rlist_effective_inc(const int clusterSize,
+ const gmx::RVec &averageClusterBoundingBox)
+{
+ /* The average length of the diagonal of a sub cell */
+ const real diagonal = std::sqrt(norm2(averageClusterBoundingBox));
+
+ const real volumeRatio = (clusterSize - 1.0_real)/clusterSize;
+
+ return c_nbnxnRlistIncreaseOutsideFactor*gmx::square(volumeRatio)*0.5_real*diagonal;
+}
#ifndef GMX_NBNXM_NBNXM_GEOMETRY_H
#define GMX_NBNXM_NBNXM_GEOMETRY_H
+#include "gromacs/math/vectypes.h"
#include "gromacs/utility/fatalerror.h"
/* Returns the base-2 log of n.
return log2;
}
+/* Returns whether the pair-list corresponding to nb_kernel_type is simple */
+bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+
/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
int nbnxn_kernel_to_cluster_i_size(int nb_kernel_type);
/* Returns the nbnxn i-cluster size in atoms for the nbnxn kernel type */
int nbnxn_kernel_to_cluster_j_size(int nb_kernel_type);
+/* Returns the effective list radius of the pair-list
+ *
+ * Due to the cluster size the effective pair-list is longer than
+ * that of a simple atom pair-list. This function gives the extra distance.
+ *
+ * NOTE: If the i- and j-cluster sizes are identical and you know
+ * the physical dimensions of the clusters, use the next function
+ * for more accurate results
+ */
+real nbnxn_get_rlist_effective_inc(int jClusterSize,
+ real atomDensity);
+
+/* Returns the effective list radius of the pair-list
+ *
+ * Due to the cluster size the effective pair-list is longer than
+ * that of a simple atom pair-list. This function gives the extra distance.
+ */
+real nbnxn_get_rlist_effective_inc(int clusterSize,
+ const gmx::RVec &averageClusterBoundingBox);
+
#endif
#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
#include "gromacs/nbnxm/pairlist_tuning.h"
+#include "gromacs/nbnxm/pairlistset.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/logger.h"
static void pick_nbnxn_kernel(const gmx::MDLogger &mdlog,
gmx_bool use_simd_kernels,
const gmx_hw_info_t &hardwareInfo,
- gmx_bool bUseGPU,
+ bool useGpu,
EmulateGpuNonbonded emulateGpu,
const t_inputrec *ir,
int *kernel_type,
GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
}
}
- else if (bUseGPU)
+ else if (useGpu)
{
*kernel_type = nbnxnk8x8x8_GPU;
}
const gmx_mtop_t *mtop,
matrix box)
{
- nonbonded_verlet_t *nbv;
- char *env;
+ nonbonded_verlet_t *nbv = new nonbonded_verlet_t();
- nbv = new nonbonded_verlet_t();
+ const EmulateGpuNonbonded emulateGpu =
+ ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
+ bool useGpu = deviceInfo != nullptr;
- nbv->emulateGpu = ((getenv("GMX_EMULATE_GPU") != nullptr) ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
- nbv->bUseGPU = deviceInfo != nullptr;
-
- GMX_RELEASE_ASSERT(!(nbv->emulateGpu == EmulateGpuNonbonded::Yes && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
+ GMX_RELEASE_ASSERT(!(emulateGpu == EmulateGpuNonbonded::Yes && useGpu), "When GPU emulation is active, there cannot be a GPU assignment");
nbv->nbs = nullptr;
- nbv->min_ci_balanced = 0;
- nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
- for (int i = 0; i < nbv->ngrp; i++)
- {
- nbv->grp[i].nbl_lists.nnbl = 0;
- nbv->grp[i].kernel_type = nbnxnkNotSet;
+ pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+ useGpu, emulateGpu, ir,
+ &nbv->kernelType_,
+ &nbv->ewaldExclusionType_,
+ fr->bNonbonded);
- if (i == 0) /* local */
- {
- pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
- nbv->bUseGPU, nbv->emulateGpu, ir,
- &nbv->grp[i].kernel_type,
- &nbv->grp[i].ewald_excl,
- fr->bNonbonded);
- }
- else /* non-local */
- {
- /* Use the same kernel for local and non-local interactions */
- nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
- nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
- }
+ const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
+
+ const bool pairlistIsSimple = nbv->pairlistIsSimple();
+ for (nbnxn_pairlist_set_t &pairlistSet : nbv->pairlistSets)
+ {
+ // TODO Change this to a constructor
+ /* The second parameter tells whether lists should be combined,
+ * this is currently only and always done for GPU lists.
+ */
+ nbnxn_init_pairlist_set(&pairlistSet, pairlistIsSimple, !pairlistIsSimple);
}
+ nbv->min_ci_balanced = 0;
+
nbv->listParams = std::make_unique<NbnxnListParameters>(ir->rlist);
- setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->grp[0].kernel_type, fr->ic,
+ setupDynamicPairlistPruning(mdlog, ir, mtop, box, nbv->kernelType_, fr->ic,
nbv->listParams.get());
- nbv->nbs = std::make_unique<nbnxn_search>(DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+ nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
+ DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
bFEP_NonBonded,
gmx_omp_nthreads_get(emntPairsearch));
- for (int i = 0; i < nbv->ngrp; i++)
- {
- nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
- nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
- /* 8x8x8 "non-simple" lists are ATM always combined */
- !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type));
- }
-
int enbnxninitcombrule;
if (fr->ic->vdwtype == evdwCUT &&
(fr->ic->vdw_modifier == eintmodNONE ||
enbnxninitcombrule = enbnxninitcombruleNONE;
}
- nbv->nbat = new nbnxn_atomdata_t(nbv->bUseGPU ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
+ nbv->nbat = new nbnxn_atomdata_t(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
if (ir->opts.ngener - ir->nwall == 1)
{
*/
mimimumNumEnergyGroupNonbonded = 1;
}
- bool bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[0].kernel_type);
nbnxn_atomdata_init(mdlog,
nbv->nbat,
- nbv->grp[0].kernel_type,
+ nbv->kernelType_,
enbnxninitcombrule,
fr->ntype, fr->nbfp,
mimimumNumEnergyGroupNonbonded,
- bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+ pairlistIsSimple ? gmx_omp_nthreads_get(emntNonbonded) : 1);
- if (nbv->bUseGPU)
+ if (useGpu)
{
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
nbv->listParams.get(),
nbv->nbat,
cr->nodeid,
- (nbv->ngrp > 1));
+ haveMultipleDomains);
- if ((env = getenv("GMX_NB_MIN_CI")) != nullptr)
+ if (const char *env = getenv("GMX_NB_MIN_CI"))
{
char *end;
const interaction_const_t *ic,
const NbnxnListParameters *listParams)
{
- if (!nbv || nbv->grp[InteractionLocality::Local].kernel_type != nbnxnk8x8x8_GPU)
+ if (!nbv || !nbv->useGpu())
{
return;
}
#include "gromacs/mdtypes/group.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/nbnxm/atomdata.h"
+#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/nbnxm/nbnxm_simd.h"
}
#endif //GMX_SIMD
-gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type)
-{
- if (nb_kernel_type == nbnxnkNotSet)
- {
- gmx_fatal(FARGS, "Non-bonded kernel type not set for Verlet-style pair-list.");
- }
-
- switch (nb_kernel_type)
- {
- case nbnxnk8x8x8_GPU:
- case nbnxnk8x8x8_PlainC:
- return FALSE;
-
- case nbnxnk4x4_PlainC:
- case nbnxnk4xN_SIMD_4xN:
- case nbnxnk4xN_SIMD_2xNN:
- return TRUE;
-
- default:
- gmx_incons("Invalid nonbonded kernel type passed!");
- return FALSE;
- }
-}
-
/* Initializes a single nbnxn_pairlist_t data structure */
static void nbnxn_init_pairlist_fep(t_nblist *nl)
{
free_nblist(nbl_fep.get());
}
-nbnxn_search::nbnxn_search(const ivec *n_dd_cells,
+nbnxn_search::nbnxn_search(int ePBC,
+ const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
gmx_bool bFEP,
int nthread_max) :
bFEP(bFEP),
- ePBC(epbcNONE), // The correct value will be set during the gridding
+ ePBC(ePBC),
zones(zones),
natoms_local(0),
natoms_nonlocal(0),
nbs_cycle_clear(cc);
}
-nbnxn_search *nbnxn_init_search(const ivec *n_dd_cells,
+nbnxn_search *nbnxn_init_search(int ePBC,
+ const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
gmx_bool bFEP,
int nthread_max)
{
- return new nbnxn_search(n_dd_cells, zones, bFEP, nthread_max);
+ return new nbnxn_search(ePBC, n_dd_cells, zones, bFEP, nthread_max);
}
static void init_buffer_flags(nbnxn_buffer_flags_t *flags,
minimum_subgrid_size_xy(jGrid));
}
-/* Clusters at the cut-off only increase rlist by 60% of their size */
-static real nbnxn_rlist_inc_outside_fac = 0.6;
-
-/* Due to the cluster size the effective pair-list is longer than
- * that of a simple atom pair-list. This function gives the extra distance.
- */
-real nbnxn_get_rlist_effective_inc(int cluster_size_j, real atom_density)
-{
- int cluster_size_i;
- real vol_inc_i, vol_inc_j;
-
- /* We should get this from the setup, but currently it's the same for
- * all setups, including GPUs.
- */
- cluster_size_i = c_nbnxnCpuIClusterSize;
-
- vol_inc_i = (cluster_size_i - 1)/atom_density;
- vol_inc_j = (cluster_size_j - 1)/atom_density;
-
- return nbnxn_rlist_inc_outside_fac*std::cbrt(vol_inc_i + vol_inc_j);
-}
-
/* Estimates the interaction volume^2 for non-local interactions */
static real nonlocal_vol2(const struct gmx_domdec_zones_t *zones, const rvec ls, real r)
{
* Maxwell is less sensitive to the exact value.
*/
const int nsubpair_target_min = 36;
- rvec ls;
real r_eff_sup, vol_est, nsp_est, nsp_est_nl;
const nbnxn_grid_t &grid = nbs->grid[0];
return;
}
+ gmx::RVec ls;
ls[XX] = (grid.c1[XX] - grid.c0[XX])/(grid.numCells[XX]*c_gpuNumClusterPerCellX);
ls[YY] = (grid.c1[YY] - grid.c0[YY])/(grid.numCells[YY]*c_gpuNumClusterPerCellY);
ls[ZZ] = grid.na_c/(grid.atom_density*ls[XX]*ls[YY]);
- /* The average length of the diagonal of a sub cell */
- real diagonal = std::sqrt(ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ]);
-
/* The formulas below are a heuristic estimate of the average nsj per si*/
- r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*gmx::square((grid.na_c - 1.0)/grid.na_c)*0.5*diagonal;
+ r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(grid.na_c, ls);
if (!nbs->DomDec || nbs->zones->n == 1)
{
std::swap(nbl->sci, work.sci_sort);
}
-/* Make a local or non-local pair-list, depending on iloc */
-void nbnxn_make_pairlist(nbnxn_search *nbs,
- nbnxn_atomdata_t *nbat,
+void nbnxn_make_pairlist(nonbonded_verlet_t *nbv,
+ const InteractionLocality iLocality,
const t_blocka *excl,
- const real rlist,
- const int min_ci_balanced,
- nbnxn_pairlist_set_t *nbl_list,
- const InteractionLocality iloc,
- const int nb_kernel_type,
+ const int64_t step,
t_nrnb *nrnb)
{
+ nbnxn_search *nbs = nbv->nbs.get();
+ nbnxn_atomdata_t *nbat = nbv->nbat;
+ const real rlist = nbv->listParams->rlistOuter;
+ nbnxn_pairlist_set_t *nbl_list = &nbv->pairlistSets[iLocality];
+
int nsubpair_target;
float nsubpair_tot_est;
int nnbl;
nbat->bUseBufferFlags = (nbat->out.size() > 1);
/* We should re-init the flags before making the first list */
- if (nbat->bUseBufferFlags && iloc == InteractionLocality::Local)
+ if (nbat->bUseBufferFlags && iLocality == InteractionLocality::Local)
{
init_buffer_flags(&nbat->buffer_flags, nbat->numAtoms());
}
int nzi;
- if (iloc == InteractionLocality::Local)
+ if (iLocality == InteractionLocality::Local)
{
/* Only zone (grid) 0 vs 0 */
nzi = 1;
nzi = nbs->zones->nizone;
}
- if (!nbl_list->bSimple && min_ci_balanced > 0)
+ if (!nbl_list->bSimple && nbv->min_ci_balanced > 0)
{
- get_nsubpair_target(nbs, iloc, rlist, min_ci_balanced,
+ get_nsubpair_target(nbs, iLocality, rlist, nbv->min_ci_balanced,
&nsubpair_target, &nsubpair_tot_est);
}
else
int zj0;
int zj1;
- if (iloc == InteractionLocality::Local)
+ if (iLocality == InteractionLocality::Local)
{
zj0 = 0;
zj1 = 1;
/* With GPU: generate progressively smaller lists for
* load balancing for local only or non-local with 2 zones.
*/
- progBal = (iloc == InteractionLocality::Local || nbs->zones->n <= 2);
+ progBal = (iLocality == InteractionLocality::Local || nbs->zones->n <= 2);
#pragma omp parallel for num_threads(nnbl) schedule(static)
for (int th = 0; th < nnbl; th++)
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nb_kernel_type,
+ nbv->kernelType_,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
nbnxn_make_pairlist_part(nbs, iGrid, jGrid,
&nbs->work[th], nbat, *excl,
rlist,
- nb_kernel_type,
+ nbv->kernelType_,
ci_block,
nbat->bUseBufferFlags,
nsubpair_target,
GMX_ASSERT(nbl_list->nbl[0]->ciOuter.empty(), "ciOuter is invalid so it should be empty");
}
+ nbl_list->outerListCreationStep = step;
+
/* Special performance logging stuff (env.var. GMX_NBNXN_CYCLE) */
- if (iloc == InteractionLocality::Local)
+ if (iLocality == InteractionLocality::Local)
{
nbs->search_count++;
}
if (nbs->print_cycles &&
- (!nbs->DomDec || iloc == InteractionLocality::NonLocal) &&
+ (!nbs->DomDec || iLocality == InteractionLocality::NonLocal) &&
nbs->search_count % 100 == 0)
{
nbs_cycle_print(stderr, nbs);
print_reduction_cost(&nbat->buffer_flags, nbl_list->nnbl);
}
}
+
+ if (nbv->listParams->useDynamicPruning && !nbv->useGpu())
+ {
+ nbnxnPrepareListForDynamicPruning(nbl_list);
+ }
+
+ if (nbv->useGpu())
+ {
+ /* Launch the transfer of the pairlist to the GPU.
+ *
+ * NOTE: The launch overhead is currently not timed separately
+ */
+ Nbnxm::gpu_init_pairlist(nbv->gpu_nbv,
+ nbl_list->nblGpu[0],
+ iLocality);
+ }
}
void nbnxnPrepareListForDynamicPruning(nbnxn_pairlist_set_t *listSet)
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/state.h"
#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_geometry.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/cstringutil.h"
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#include "gmxpre.h"
+
+#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/pairlist.h"
+
+int nbnxnNumStepsWithPairlist(const nonbonded_verlet_t &nbv,
+ const Nbnxm::InteractionLocality iLocality,
+ const int64_t step)
+{
+ return step - nbv.pairlistSets[iLocality].outerListCreationStep;
+}
+
+bool nbnxnIsDynamicPairlistPruningStep(const nonbonded_verlet_t &nbv,
+ const Nbnxm::InteractionLocality iLocality,
+ const int64_t step)
+{
+ return nbnxnNumStepsWithPairlist(nbv, iLocality, step) % nbv.listParams->nstlistPrune == 0;
+}
*/
typedef void nbnxn_free_t (void *ptr);
-/* Tells if the pair-list corresponding to nb_kernel_type is simple.
- * Returns FALSE for super-sub type pair-list.
- */
-gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
-
-/* Due to the cluster size the effective pair-list is longer than
- * that of a simple atom pair-list. This function gives the extra distance.
- */
-real nbnxn_get_rlist_effective_inc(int cluster_size, real atom_density);
-
/* Allocates and initializes a pair search data structure */
-nbnxn_search *nbnxn_init_search(const ivec *n_dd_cells,
+nbnxn_search *nbnxn_init_search(int ePBC,
+ const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
gmx_bool bFEP,
int nthread_max);
void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
gmx_bool simple, gmx_bool combined);
-/* Make a pair-list with radius rlist, store it in nbl.
- * The parameter min_ci_balanced sets the minimum required
- * number or roughly equally sized ci blocks in nbl.
- * When set >0 ci lists will be chopped up when the estimate
- * for the number of equally sized lists is below min_ci_balanced.
- * With perturbed particles, also a group scheme style nbl_fep list is made.
- */
-void nbnxn_make_pairlist(nbnxn_search *nbs,
- nbnxn_atomdata_t *nbat,
- const t_blocka *excl,
- real rlist,
- int min_ci_balanced,
- nbnxn_pairlist_set_t *nbl_list,
- Nbnxm::InteractionLocality iloc,
- int nb_kernel_type,
- t_nrnb *nrnb);
-
/*! \brief Prepare the list-set produced by the search for dynamic pruning
*
* \param[in,out] listSet The list-set to prepare for dynamic pruning.
const Nbnxm::InteractionLocality ilocality,
const rvec *shift_vec)
{
- nonbonded_verlet_group_t &nbvg = nbv->grp[ilocality];
- nbnxn_pairlist_set_t *nbl_lists = &nbvg.nbl_lists;
+ nbnxn_pairlist_set_t *nbl_lists = &nbv->pairlistSets[ilocality];
const nbnxn_atomdata_t *nbat = nbv->nbat;
const real rlistInner = nbv->listParams->rlistInner;
{
NbnxnPairlistCpu *nbl = nbl_lists->nbl[i];
- switch (nbvg.kernel_type)
+ switch (nbv->kernelType_)
{
case nbnxnk4xN_SIMD_4xN:
nbnxn_kernel_prune_4xn(nbl, nbat, shift_vec, rlistInner);