/* Print statistics of a pair list, used for debug output */
static void print_nblist_statistics(FILE *fp,
const NbnxnPairlistCpu &nbl,
- const PairSearch &pairSearch,
+ const Nbnxm::GridSet &gridSet,
const real rl)
{
- const Grid &grid = pairSearch.gridSet().grids()[0];
+ const Grid &grid = gridSet.grids()[0];
const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nci %zu ncj %d\n",
/* Print statistics of a pair lists, used for debug output */
static void print_nblist_statistics(FILE *fp,
const NbnxnPairlistGpu &nbl,
- const PairSearch &pairSearch,
+ const Nbnxm::GridSet &gridSet,
const real rl)
{
- const Grid &grid = pairSearch.gridSet().grids()[0];
+ const Grid &grid = gridSet.grids()[0];
const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n",
}
/* Estimates the average size of a full j-list for super/sub setup */
-static void get_nsubpair_target(const PairSearch &pairSearch,
+static void get_nsubpair_target(const Nbnxm::GridSet &gridSet,
const InteractionLocality iloc,
const real rlist,
const int min_ci_balanced,
const int nsubpair_target_min = 36;
real r_eff_sup, vol_est, nsp_est, nsp_est_nl;
- const Grid &grid = pairSearch.gridSet().grids()[0];
+ const Grid &grid = gridSet.grids()[0];
/* We don't need to balance list sizes if:
* - We didn't request balancing.
/* The formulas below are a heuristic estimate of the average nsj per si*/
r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls);
- if (!pairSearch.domainSetup().haveDomDec ||
- pairSearch.domainSetup().zones->n == 1)
+ if (!gridSet.domainSetup().haveMultipleDomains ||
+ gridSet.domainSetup().zones->n == 1)
{
nsp_est_nl = 0;
}
{
nsp_est_nl =
gmx::square(dims.atomDensity/numAtomsCluster)*
- nonlocal_vol2(pairSearch.domainSetup().zones, ls, r_eff_sup);
+ nonlocal_vol2(gridSet.domainSetup().zones, ls, r_eff_sup);
}
if (iloc == InteractionLocality::Local)
}
static int get_ci_block_size(const Grid &iGrid,
- const bool haveDomDec,
+ const bool haveMultipleDomains,
const int numLists)
{
const int ci_block_enum = 5;
/* Without domain decomposition
* or with less than 3 blocks per task, divide in nth blocks.
*/
- if (!haveDomDec || numLists*3*ci_block > iGrid.numCells())
+ if (!haveMultipleDomains || numLists*3*ci_block > iGrid.numCells())
{
ci_block = (iGrid.numCells() + numLists - 1)/numLists;
}
/* Generates the part of pair-list nbl assigned to our thread */
template <typename T>
-static void nbnxn_make_pairlist_part(const PairSearch &pairSearch,
+static void nbnxn_make_pairlist_part(const Nbnxm::GridSet &gridSet,
const Grid &iGrid,
const Grid &jGrid,
PairsearchWork *work,
gridj_flag = work->buffer_flags.flag;
}
- const Nbnxm::GridSet &gridSet = pairSearch.gridSet();
-
gridSet.getBox(box);
const bool haveFep = gridSet.haveFep();
/* Check if we need periodicity shifts.
* Without PBC or with domain decomposition we don't need them.
*/
- if (d >= ePBC2npbcdim(pairSearch.domainSetup().ePBC) ||
- pairSearch.domainSetup().haveDomDecPerDim[d])
+ if (d >= ePBC2npbcdim(gridSet.domainSetup().ePBC) ||
+ gridSet.domainSetup().haveMultipleDomainsPerDim[d])
{
shp[d] = 0;
}
{
fprintf(debug, "number of distance checks %d\n", numDistanceChecks);
- print_nblist_statistics(debug, *nbl, pairSearch, rlist);
+ print_nblist_statistics(debug, *nbl, gridSet, rlist);
if (haveFep)
{
}
}
-static void reduce_buffer_flags(const PairSearch &pairSearch,
- int nsrc,
- const nbnxn_buffer_flags_t *dest)
+static void reduce_buffer_flags(gmx::ArrayRef<PairsearchWork> searchWork,
+ int nsrc,
+ const nbnxn_buffer_flags_t *dest)
{
for (int s = 0; s < nsrc; s++)
{
- gmx_bitmask_t * flag = pairSearch.work()[s].buffer_flags.flag;
+ gmx_bitmask_t * flag = searchWork[s].buffer_flags.flag;
for (int b = 0; b < dest->nflag; b++)
{
static void prepareListsForDynamicPruning(gmx::ArrayRef<NbnxnPairlistCpu> lists);
void
-PairlistSet::constructPairlists(PairSearch *pairSearch,
- nbnxn_atomdata_t *nbat,
- const t_blocka *excl,
- const Nbnxm::KernelType kernelType,
- const int minimumIlistCountForGpuBalancing,
- t_nrnb *nrnb)
+PairlistSet::constructPairlists(const Nbnxm::GridSet &gridSet,
+ gmx::ArrayRef<PairsearchWork> searchWork,
+ nbnxn_atomdata_t *nbat,
+ const t_blocka *excl,
+ const Nbnxm::KernelType kernelType,
+ const int minimumIlistCountForGpuBalancing,
+ t_nrnb *nrnb,
+ SearchCycleCounting *searchCycleCounting)
{
const real rlist = params_.rlistOuter;
}
else
{
- nzi = pairSearch->domainSetup().zones->nizone;
+ nzi = gridSet.domainSetup().zones->nizone;
}
if (!isCpuType_ && minimumIlistCountForGpuBalancing > 0)
{
- get_nsubpair_target(*pairSearch, locality_, rlist, minimumIlistCountForGpuBalancing,
+ get_nsubpair_target(gridSet, locality_, rlist, minimumIlistCountForGpuBalancing,
&nsubpair_target, &nsubpair_tot_est);
}
else
}
}
- const gmx_domdec_zones_t *ddZones = pairSearch->domainSetup().zones;
+ const gmx_domdec_zones_t *ddZones = gridSet.domainSetup().zones;
for (int zi = 0; zi < nzi; zi++)
{
- const Grid &iGrid = pairSearch->gridSet().grids()[zi];
+ const Grid &iGrid = gridSet.grids()[zi];
int zj0;
int zj1;
}
for (int zj = zj0; zj < zj1; zj++)
{
- const Grid &jGrid = pairSearch->gridSet().grids()[zj];
+ const Grid &jGrid = gridSet.grids()[zj];
if (debug)
{
fprintf(debug, "ns search grid %d vs %d\n", zi, zj);
}
- pairSearch->cycleCounting_.start(PairSearch::enbsCCsearch);
+ searchCycleCounting->start(enbsCCsearch);
- ci_block = get_ci_block_size(iGrid, pairSearch->domainSetup().haveDomDec, numLists);
+ ci_block = get_ci_block_size(iGrid, gridSet.domainSetup().haveMultipleDomains, numLists);
/* With GPU: generate progressively smaller lists for
* load balancing for local only or non-local with 2 zones.
*/
if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0)))
{
- init_buffer_flags(&pairSearch->work()[th].buffer_flags, nbat->numAtoms());
+ init_buffer_flags(&searchWork[th].buffer_flags, nbat->numAtoms());
}
if (combineLists_ && th > 0)
clear_pairlist(&gpuLists_[th]);
}
- PairsearchWork *searchWork = &pairSearch->work()[th];
+ PairsearchWork &work = searchWork[th];
- searchWork->cycleCounter.start();
+ work.cycleCounter.start();
t_nblist *fepListPtr = (fepLists_.empty() ? nullptr : fepLists_[th]);
/* Divide the i cells equally over the pairlists */
if (isCpuType_)
{
- nbnxn_make_pairlist_part(*pairSearch, iGrid, jGrid,
- searchWork, nbat, *excl,
+ nbnxn_make_pairlist_part(gridSet, iGrid, jGrid,
+ &work, nbat, *excl,
rlist,
kernelType,
ci_block,
}
else
{
- nbnxn_make_pairlist_part(*pairSearch, iGrid, jGrid,
- searchWork, nbat, *excl,
+ nbnxn_make_pairlist_part(gridSet, iGrid, jGrid,
+ &work, nbat, *excl,
rlist,
kernelType,
ci_block,
fepListPtr);
}
- searchWork->cycleCounter.stop();
+ work.cycleCounter.stop();
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
- pairSearch->cycleCounting_.stop(PairSearch::enbsCCsearch);
+ searchCycleCounting->stop(enbsCCsearch);
np_tot = 0;
np_noq = 0;
np_hlj = 0;
for (int th = 0; th < numLists; th++)
{
- inc_nrnb(nrnb, eNR_NBNXN_DIST2, pairSearch->work()[th].ndistc);
+ inc_nrnb(nrnb, eNR_NBNXN_DIST2, searchWork[th].ndistc);
if (isCpuType_)
{
{
GMX_ASSERT(!isCpuType_, "Can only combine GPU lists");
- pairSearch->cycleCounting_.start(PairSearch::enbsCCcombine);
+ searchCycleCounting->start(enbsCCcombine);
combine_nblists(gmx::constArrayRefFromArray(&gpuLists_[1], numLists - 1),
&gpuLists_[0]);
- pairSearch->cycleCounting_.stop(PairSearch::enbsCCcombine);
+ searchCycleCounting->stop(enbsCCcombine);
}
}
}
{
if (numLists > 1 && checkRebalanceSimpleLists(cpuLists_))
{
- rebalanceSimpleLists(cpuLists_, cpuListsWork_, pairSearch->work());
+ rebalanceSimpleLists(cpuLists_, cpuListsWork_, searchWork);
/* Swap the sets of pair lists */
cpuLists_.swap(cpuListsWork_);
if (nbat->bUseBufferFlags)
{
- reduce_buffer_flags(*pairSearch, numLists, &nbat->buffer_flags);
+ reduce_buffer_flags(searchWork, numLists, &nbat->buffer_flags);
}
- if (pairSearch->gridSet().haveFep())
+ if (gridSet.haveFep())
{
/* Balance the free-energy lists over all the threads */
- balance_fep_lists(fepLists_, pairSearch->work());
+ balance_fep_lists(fepLists_, searchWork);
}
if (isCpuType_)
{
for (auto &cpuList : cpuLists_)
{
- print_nblist_statistics(debug, cpuList, *pairSearch, rlist);
+ print_nblist_statistics(debug, cpuList, gridSet, rlist);
}
}
else if (!isCpuType_ && gpuLists_.size() > 1)
{
- print_nblist_statistics(debug, gpuLists_[0], *pairSearch, rlist);
+ print_nblist_statistics(debug, gpuLists_[0], gridSet, rlist);
}
}
const int64_t step,
t_nrnb *nrnb)
{
- pairlistSet(iLocality).constructPairlists(pairSearch, nbat, excl, kernelType, minimumIlistCountForGpuBalancing_, nrnb);
+ pairlistSet(iLocality).constructPairlists(pairSearch->gridSet(), pairSearch->work(),
+ nbat, excl, kernelType, minimumIlistCountForGpuBalancing_,
+ nrnb, &pairSearch->cycleCounting_);
if (iLocality == Nbnxm::InteractionLocality::Local)
{
pairSearch->cycleCounting_.searchCount_++;
}
if (pairSearch->cycleCounting_.recordCycles_ &&
- (!pairSearch->domainSetup().haveDomDec || iLocality == InteractionLocality::NonLocal) &&
+ (!pairSearch->gridSet().domainSetup().haveMultipleDomains || iLocality == InteractionLocality::NonLocal) &&
pairSearch->cycleCounting_.searchCount_ % 100 == 0)
{
pairSearch->cycleCounting_.printCycles(stderr, pairSearch->work());
gmx_cycles_t start_ = 0;
};
+//! Local cycle count enum for profiling different parts of search
+enum {
+ enbsCCgrid, enbsCCsearch, enbsCCcombine, enbsCCnr
+};
+
+/*! \internal
+ * \brief Struct for collecting detailed cycle counts for the search
+ */
+struct SearchCycleCounting
+{
+ //! Start a pair search cycle counter
+ void start(const int enbsCC)
+ {
+ cc_[enbsCC].start();
+ }
+
+ //! Stop a pair search cycle counter
+ void stop(const int enbsCC)
+ {
+ cc_[enbsCC].stop();
+ }
+
+ //! Print the cycle counts to \p fp
+ void printCycles(FILE *fp,
+ gmx::ArrayRef<const PairsearchWork> work) const;
+
+ //! Tells whether we record cycles
+ bool recordCycles_ = false;
+ //! The number of times pairsearching has been performed, local+non-local count as 1
+ int searchCount_ = 0;
+ //! The set of cycle counters
+ nbnxn_cycle_t cc_[enbsCCnr];
+};
+
// TODO: Move nbnxn_search_work_t definition to its own file
/* Thread-local work struct, contains working data for Grid */
class PairSearch
{
public:
- /*! \internal
- * \brief Description of the domain setup: PBC and the connections between domains
- */
- struct DomainSetup
- {
- /*! \internal
- * \brief Description of the domain setup: PBC and the connections between domains
- */
- //! Constructor, without DD \p numDDCells and \p ddZones should be nullptr
- DomainSetup(int ePBC,
- const ivec *numDDCells,
- const gmx_domdec_zones_t *ddZones);
-
- //! The type of PBC
- int ePBC;
- //! Tells whether we are using domain decomposition
- bool haveDomDec;
- //! Tells whether we are using domain decomposition per dimension
- std::array<bool, DIM> haveDomDecPerDim;
- //! The domain decomposition zone setup
- const gmx_domdec_zones_t *zones;
- };
-
- //! Local cycle count enum for profiling different parts of search
- enum {
- enbsCCgrid, enbsCCsearch, enbsCCcombine, enbsCCnr
- };
-
- struct SearchCycleCounting
- {
- //! Start a pair search cycle counter
- void start(const int enbsCC)
- {
- cc_[enbsCC].start();
- }
-
- //! Stop a pair search cycle counter
- void stop(const int enbsCC)
- {
- cc_[enbsCC].stop();
- }
-
- //! Print the cycle counts to \p fp
- void printCycles(FILE *fp,
- gmx::ArrayRef<const PairsearchWork> work) const;
-
- bool recordCycles_ = false;
- int searchCount_ = 0;
- nbnxn_cycle_t cc_[enbsCCnr];
- };
-
//! Puts the atoms in \p ddZone on the grid and copies the coordinates to \p nbat
void putOnGrid(const matrix box,
int ddZone,
gridSet_.setLocalAtomOrder();
}
- const DomainSetup domainSetup() const
- {
- return domainSetup_;
- }
-
//! Returns the set of search grids
const Nbnxm::GridSet &gridSet() const
{
}
private:
- //! The domain setup
- DomainSetup domainSetup_;
//! The set of search grids
Nbnxm::GridSet gridSet_;
//! Work objects, one entry for each thread
public:
//! Cycle counting for measuring components of the search
- SearchCycleCounting cycleCounting_;
+ SearchCycleCounting cycleCounting_;
};
#endif