#include "internal.h"
#include "pairlistwork.h"
-using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+
+using nbnxn_bb_t = Nbnxm::BoundingBox; // TODO: Remove when refactoring this file
+
+using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file
// Convience alias for partial Nbnxn namespace usage
using InteractionLocality = Nbnxm::InteractionLocality;
nbnxn_search::nbnxn_search(int ePBC,
const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
+ const PairlistType pairlistType,
gmx_bool bFEP,
int nthread_max) :
bFEP(bFEP),
}
}
- grid.resize(numGrids);
+ grid.resize(numGrids, pairlistType);
/* Initialize detailed nbsearch cycle counting */
print_cycles = (getenv("GMX_NBNXN_CYCLE") != nullptr);
* assigned to (when atom groups instead of individual atoms are assigned
* to cells), this distance returned can be larger than the input.
*/
-static real listRangeForBoundingBoxToGridCell(real rlist,
- const nbnxn_grid_t &grid)
+static real
+listRangeForBoundingBoxToGridCell(real rlist,
+ const Grid::Dimensions &gridDims)
{
- return rlist + grid.maxAtomGroupRadius;
+ return rlist + gridDims.maxAtomGroupRadius;
}
/* Returns the pair-list cutoff between a grid cells given an atom-to-atom pair-list cutoff
* assigned to (when atom groups instead of individual atoms are assigned
* to cells), this distance returned can be larger than the input.
*/
-static real listRangeForGridCellToGridCell(real rlist,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid)
+static real
+listRangeForGridCellToGridCell(real rlist,
+ const Grid::Dimensions &iGridDims,
+ const Grid::Dimensions &jGridDims)
{
- return rlist + iGrid.maxAtomGroupRadius + jGrid.maxAtomGroupRadius;
+ return rlist + iGridDims.maxAtomGroupRadius + jGridDims.maxAtomGroupRadius;
}
/* Determines the cell range along one dimension that
*/
template<int dim>
static void get_cell_range(real b0, real b1,
- const nbnxn_grid_t &jGrid,
+ const Grid::Dimensions &jGridDims,
real d2, real rlist, int *cf, int *cl)
{
- real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid));
- real distanceInCells = (b0 - jGrid.c0[dim])*jGrid.invCellSize[dim];
+ real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGridDims));
+ real distanceInCells = (b0 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim];
*cf = std::max(static_cast<int>(distanceInCells), 0);
while (*cf > 0 &&
- d2 + gmx::square((b0 - jGrid.c0[dim]) - (*cf - 1 + 1)*jGrid.cellSize[dim]) < listRangeBBToCell2)
+ d2 + gmx::square((b0 - jGridDims.lowerCorner[dim]) - (*cf - 1 + 1)*jGridDims.cellSize[dim]) < listRangeBBToCell2)
{
(*cf)--;
}
- *cl = std::min(static_cast<int>((b1 - jGrid.c0[dim])*jGrid.invCellSize[dim]), jGrid.numCells[dim] - 1);
- while (*cl < jGrid.numCells[dim] - 1 &&
- d2 + gmx::square((*cl + 1)*jGrid.cellSize[dim] - (b1 - jGrid.c0[dim])) < listRangeBBToCell2)
+ *cl = std::min(static_cast<int>((b1 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim]), jGridDims.numCells[dim] - 1);
+ while (*cl < jGridDims.numCells[dim] - 1 &&
+ d2 + gmx::square((*cl + 1)*jGridDims.cellSize[dim] - (b1 - jGridDims.lowerCorner[dim])) < listRangeBBToCell2)
{
(*cl)++;
}
static void print_nblist_statistics(FILE *fp, const NbnxnPairlistCpu *nbl,
const nbnxn_search *nbs, real rl)
{
- const nbnxn_grid_t *grid;
- int cs[SHIFTS];
- int npexcl;
-
- grid = &nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
+ const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nci %zu ncj %d\n",
nbl->ci.size(), nbl->ncjInUse);
+ const int numAtomsJCluster = grid.geometry().numAtomsJCluster;
+ const double numAtomsPerCell = nbl->ncjInUse/static_cast<double>(grid.numCells())*numAtomsJCluster;
fprintf(fp, "nbl na_cj %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
- nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast<double>(grid->nc),
- nbl->ncjInUse/static_cast<double>(grid->nc)*grid->na_cj,
- nbl->ncjInUse/static_cast<double>(grid->nc)*grid->na_cj/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_cj/(grid->size[XX]*grid->size[YY]*grid->size[ZZ])));
+ nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast<double>(grid.numCells()),
+ numAtomsPerCell,
+ numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numCells()*numAtomsJCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ])));
fprintf(fp, "nbl average j cell list length %.1f\n",
0.25*nbl->ncjInUse/std::max(static_cast<double>(nbl->ci.size()), 1.0));
- for (int s = 0; s < SHIFTS; s++)
- {
- cs[s] = 0;
- }
- npexcl = 0;
+ int cs[SHIFTS] = { 0 };
+ int npexcl = 0;
for (const nbnxn_ci_t &ciEntry : nbl->ci)
{
cs[ciEntry.shift & NBNXN_CI_SHIFT] +=
static void print_nblist_statistics(FILE *fp, const NbnxnPairlistGpu *nbl,
const nbnxn_search *nbs, real rl)
{
- const nbnxn_grid_t *grid;
- int b;
- int c[c_gpuNumClusterPerCell + 1];
- double sum_nsp, sum_nsp2;
- int nsp_max;
-
- /* This code only produces correct statistics with domain decomposition */
- grid = &nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
+ const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n",
nbl->sci.size(), nbl->cj4.size(), nbl->nci_tot, nbl->excl.size());
+ const int numAtomsCluster = grid.geometry().numAtomsICluster;
+ const double numAtomsPerCell = nbl->nci_tot/static_cast<double>(grid.numClusters())*numAtomsCluster;
fprintf(fp, "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
- nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast<double>(grid->nsubc_tot),
- nbl->nci_tot/static_cast<double>(grid->nsubc_tot)*grid->na_c,
- nbl->nci_tot/static_cast<double>(grid->nsubc_tot)*grid->na_c/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nsubc_tot*grid->na_c/(grid->size[XX]*grid->size[YY]*grid->size[ZZ])));
-
- sum_nsp = 0;
- sum_nsp2 = 0;
- nsp_max = 0;
- for (int si = 0; si <= c_gpuNumClusterPerCell; si++)
- {
- c[si] = 0;
- }
+ nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast<double>(grid.numClusters()),
+ numAtomsPerCell,
+ numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numClusters()*numAtomsCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ])));
+
+ double sum_nsp = 0;
+ double sum_nsp2 = 0;
+ int nsp_max = 0;
+ int c[c_gpuNumClusterPerCell + 1] = { 0 };
for (const nbnxn_sci_t &sci : nbl->sci)
{
int nsp = 0;
{
for (int j = 0; j < c_nbnxnGpuJgroupSize; j++)
{
- b = 0;
+ int b = 0;
for (int si = 0; si < c_gpuNumClusterPerCell; si++)
{
if (nbl->cj4[j4].imei[0].imask & (1U << (j*c_gpuNumClusterPerCell + si)))
if (!nbl->cj4.empty())
{
- for (b = 0; b <= c_gpuNumClusterPerCell; b++)
+ for (int b = 0; b <= c_gpuNumClusterPerCell; b++)
{
fprintf(fp, "nbl j-list #i-subcell %d %7d %4.1f\n",
b, c[b], 100.0*c[b]/size_t {nbl->cj4.size()*c_nbnxnGpuJgroupSize});
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
static void
-makeClusterListSimple(const nbnxn_grid_t &jGrid,
+makeClusterListSimple(const Grid &jGrid,
NbnxnPairlistCpu * nbl,
int icluster,
int jclusterFirst,
InRange = FALSE;
while (!InRange && jclusterFirst <= jclusterLast)
{
- real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bb);
+ real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- int cjf_gl = jGrid.cell0 + jclusterFirst;
+ int cjf_gl = jGrid.cellOffset() + jclusterFirst;
for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++)
{
for (int j = 0; j < c_nbnxnCpuIClusterSize; j++)
InRange = FALSE;
while (!InRange && jclusterLast > jclusterFirst)
{
- real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bb);
+ real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- int cjl_gl = jGrid.cell0 + jclusterLast;
+ int cjl_gl = jGrid.cellOffset() + jclusterLast;
for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++)
{
for (int j = 0; j < c_nbnxnCpuIClusterSize; j++)
{
/* Store cj and the interaction mask */
nbnxn_cj_t cjEntry;
- cjEntry.cj = jGrid.cell0 + jcluster;
+ cjEntry.cj = jGrid.cellOffset() + jcluster;
cjEntry.excl = get_imask(excludeSubDiagonal, icluster, jcluster);
nbl->cj.push_back(cjEntry);
}
/* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj.
* Checks bounding box distances and possibly atom pair distances.
*/
-static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+static void make_cluster_list_supersub(const Grid &iGrid,
+ const Grid &jGrid,
NbnxnPairlistGpu *nbl,
const int sci,
const int scj,
const nbnxn_bb_t *bb_ci = work.iSuperClusterData.bb.data();
#endif
- assert(c_nbnxnGpuClusterSize == iGrid.na_c);
- assert(c_nbnxnGpuClusterSize == jGrid.na_c);
+ assert(c_nbnxnGpuClusterSize == iGrid.geometry().numAtomsICluster);
+ assert(c_nbnxnGpuClusterSize == jGrid.geometry().numAtomsICluster);
/* We generate the pairlist mainly based on bounding-box distances
* and do atom pair distance based pruning on the GPU.
float *d2l = work.distanceBuffer.data();
- for (int subc = 0; subc < jGrid.nsubc[scj]; subc++)
+ for (int subc = 0; subc < jGrid.numClustersPerCell()[scj]; subc++)
{
const int cj4_ind = work.cj_ind/c_nbnxnGpuJgroupSize;
const int cj_offset = work.cj_ind - cj4_ind*c_nbnxnGpuJgroupSize;
const int cj = scj*c_gpuNumClusterPerCell + subc;
- const int cj_gl = jGrid.cell0*c_gpuNumClusterPerCell + cj;
+ const int cj_gl = jGrid.cellOffset()*c_gpuNumClusterPerCell + cj;
int ci1;
if (excludeSubDiagonal && sci == scj)
}
else
{
- ci1 = iGrid.nsubc[sci];
+ ci1 = iGrid.numClustersPerCell()[sci];
}
#if NBNXN_BBXXXX
/* Determine all ci1 bb distances in one call with SIMD4 */
- subc_bb_dist2_simd4_xxxx(jGrid.pbb.data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)),
+ subc_bb_dist2_simd4_xxxx(jGrid.packedBoundingBoxes().data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)),
ci1, pbb_ci, d2l);
*numDistanceChecks += c_nbnxnGpuClusterSize*2;
#endif
#if !NBNXN_BBXXXX
/* Determine the bb distance between ci and cj */
- d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.bb);
+ d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
#endif
float d2 = d2l[ci];
real gmx_unused shy,
real gmx_unused shz,
real gmx_unused rlist_fep2,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
t_nblist *nlist)
{
int ci, cj_ind_start, cj_ind_end, cja, cjr;
int nri_max;
- int ngid, gid_i = 0, gid_j, gid;
+ int gid_i = 0, gid_j, gid;
int egp_shift, egp_mask;
int gid_cj = 0;
int ind_i, ind_j, ai, aj;
reallocate_nblist(nlist);
}
+ const int numAtomsJCluster = jGrid.geometry().numAtomsJCluster;
+
const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
- ngid = nbatParams.nenergrp;
+ const int ngid = nbatParams.nenergrp;
- if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8))
+ /* TODO: Consider adding a check in grompp and changing this to an assert */
+ const int numBitsInEnergyGroupIdsForAtomsInJCluster = sizeof(gid_cj)*8;
+ if (ngid*numAtomsJCluster > numBitsInEnergyGroupIdsForAtomsInJCluster)
{
gmx_fatal(FARGS, "The Verlet scheme with %dx%d kernels and free-energy only supports up to %zu energy groups",
- iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj);
+ iGrid.geometry().numAtomsICluster, numAtomsJCluster,
+ (sizeof(gid_cj)*8)/numAtomsJCluster);
}
egp_shift = nbatParams.neg_2log;
nlist->gid[nri] = 0;
nlist->shift[nri] = nbl_ci->shift & NBNXN_CI_SHIFT;
- bFEP_i = ((iGrid.fep[ci - iGrid.cell0] & (1 << i)) != 0u);
+ bFEP_i = iGrid.atomIsPerturbed(ci - iGrid.cellOffset(), i);
bFEP_i_all = bFEP_i_all && bFEP_i;
cja = nbl->cj[cj_ind].cj;
- if (jGrid.na_cj == jGrid.na_c)
+ if (numAtomsJCluster == jGrid.geometry().numAtomsICluster)
{
- cjr = cja - jGrid.cell0;
- fep_cj = jGrid.fep[cjr];
+ cjr = cja - jGrid.cellOffset();
+ fep_cj = jGrid.fepBits(cjr);
if (ngid > 1)
{
gid_cj = nbatParams.energrp[cja];
}
}
- else if (2*jGrid.na_cj == jGrid.na_c)
+ else if (2*numAtomsJCluster == jGrid.geometry().numAtomsICluster)
{
- cjr = cja - jGrid.cell0*2;
+ cjr = cja - jGrid.cellOffset()*2;
/* Extract half of the ci fep/energrp mask */
- fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<<jGrid.na_cj) - 1);
+ fep_cj = (jGrid.fepBits(cjr >> 1) >> ((cjr & 1)*numAtomsJCluster)) & ((1 << numAtomsJCluster) - 1);
if (ngid > 1)
{
- gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
+ gid_cj = nbatParams.energrp[cja >> 1] >> ((cja & 1)*numAtomsJCluster*egp_shift) & ((1 << (numAtomsJCluster*egp_shift)) - 1);
}
}
else
{
- cjr = cja - (jGrid.cell0>>1);
+ cjr = cja - (jGrid.cellOffset() >> 1);
/* Combine two ci fep masks/energrp */
- fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c);
+ fep_cj = jGrid.fepBits(cjr*2) + (jGrid.fepBits(cjr*2 + 1) << jGrid.geometry().numAtomsICluster);
if (ngid > 1)
{
- gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift));
+ gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.geometry().numAtomsICluster*egp_shift));
}
}
real shy,
real shz,
real rlist_fep2,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
t_nblist *nlist)
{
int nri_max;
nlist->gid[nri] = 0;
nlist->shift[nri] = nbl_sci->shift & NBNXN_CI_SHIFT;
- bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u);
+ bFEP_i = iGrid.atomIsPerturbed(c_abs - iGrid.cellOffset()*c_gpuNumClusterPerCell, i);
xi = nbat->x()[ind_i*nbat->xstride+XX] + shx;
yi = nbat->x()[ind_i*nbat->xstride+YY] + shy;
for (int gcj = 0; gcj < c_nbnxnGpuJgroupSize; gcj++)
{
- unsigned int fep_cj;
-
if ((cj4->imei[0].imask & (1U << (gcj*c_gpuNumClusterPerCell + c))) == 0)
{
/* Skip this ci for this cj */
}
const int cjr =
- cj4->cj[gcj] - jGrid.cell0*c_gpuNumClusterPerCell;
-
- fep_cj = jGrid.fep[cjr];
+ cj4->cj[gcj] - jGrid.cellOffset()*c_gpuNumClusterPerCell;
- if (bFEP_i || fep_cj != 0)
+ if (bFEP_i || jGrid.clusterIsPerturbed(cjr))
{
for (int j = 0; j < nbl->na_cj; j++)
{
/* Is this interaction perturbed and not excluded? */
- ind_j = (jGrid.cell0*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j;
+ ind_j = (jGrid.cellOffset()*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j;
aj = nbs->a[ind_j];
if (aj >= 0 &&
- (bFEP_i || (fep_cj & (1 << j))) &&
+ (bFEP_i || jGrid.atomIsPerturbed(cjr, j)) &&
(!bDiagRemoved || ind_j >= ind_i))
{
int excl_pair;
}
/* Sets a simple list i-cell bounding box, including PBC shift */
-static inline void set_icell_bb(const nbnxn_grid_t &iGrid,
+static inline void set_icell_bb(const Grid &iGrid,
int ci,
real shx, real shy, real shz,
NbnxnPairlistCpuWork *work)
{
- set_icell_bb_simple(iGrid.bb, ci, shx, shy, shz, &work->iClusterData.bb[0]);
+ set_icell_bb_simple(iGrid.iBoundingBoxes(), ci, shx, shy, shz,
+ &work->iClusterData.bb[0]);
}
#if NBNXN_BBXXXX
}
/* Sets a super-cell and sub cell bounding boxes, including PBC shift */
-gmx_unused static void set_icell_bb(const nbnxn_grid_t &iGrid,
+gmx_unused static void set_icell_bb(const Grid &iGrid,
int ci,
real shx, real shy, real shz,
NbnxnPairlistGpuWork *work)
{
#if NBNXN_BBXXXX
- set_icell_bbxxxx_supersub(iGrid.pbb, ci, shx, shy, shz,
+ set_icell_bbxxxx_supersub(iGrid.packedBoundingBoxes(), ci, shx, shy, shz,
work->iSuperClusterData.bbPacked.data());
#else
- set_icell_bb_supersub(iGrid.bb, ci, shx, shy, shz,
+ set_icell_bb_supersub(iGrid.iBoundingBoxes(), ci, shx, shy, shz,
work->iSuperClusterData.bb.data());
#endif
}
#endif /* !GMX_SIMD4_HAVE_REAL */
}
-static real minimum_subgrid_size_xy(const nbnxn_grid_t &grid)
+static real minimum_subgrid_size_xy(const Grid &grid)
{
- if (grid.bSimple)
+ const Grid::Dimensions &dims = grid.dimensions();
+
+ if (grid.geometry().isSimple)
{
- return std::min(grid.cellSize[XX], grid.cellSize[YY]);
+ return std::min(dims.cellSize[XX], dims.cellSize[YY]);
}
else
{
- return std::min(grid.cellSize[XX]/c_gpuNumClusterPerCellX,
- grid.cellSize[YY]/c_gpuNumClusterPerCellY);
+ return std::min(dims.cellSize[XX]/c_gpuNumClusterPerCellX,
+ dims.cellSize[YY]/c_gpuNumClusterPerCellY);
}
}
-static real effective_buffer_1x1_vs_MxN(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid)
+static real effective_buffer_1x1_vs_MxN(const Grid &iGrid,
+ const Grid &jGrid)
{
const real eff_1x1_buffer_fac_overest = 0.1;
const int nsubpair_target_min = 36;
real r_eff_sup, vol_est, nsp_est, nsp_est_nl;
- const nbnxn_grid_t &grid = nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
/* We don't need to balance list sizes if:
* - We didn't request balancing.
* since we will always generate at least #cells lists.
* - We don't have any cells, since then there won't be any lists.
*/
- if (min_ci_balanced <= 0 || grid.nc >= min_ci_balanced || grid.nc == 0)
+ if (min_ci_balanced <= 0 || grid.numCells() >= min_ci_balanced || grid.numCells() == 0)
{
/* nsubpair_target==0 signals no balancing */
*nsubpair_target = 0;
return;
}
- gmx::RVec ls;
- ls[XX] = (grid.c1[XX] - grid.c0[XX])/(grid.numCells[XX]*c_gpuNumClusterPerCellX);
- ls[YY] = (grid.c1[YY] - grid.c0[YY])/(grid.numCells[YY]*c_gpuNumClusterPerCellY);
- ls[ZZ] = grid.na_c/(grid.atom_density*ls[XX]*ls[YY]);
+ gmx::RVec ls;
+ const int numAtomsCluster = grid.geometry().numAtomsICluster;
+ const Grid::Dimensions &dims = grid.dimensions();
+
+ ls[XX] = dims.cellSize[XX]/c_gpuNumClusterPerCellX;
+ ls[YY] = dims.cellSize[YY]/c_gpuNumClusterPerCellY;
+ ls[ZZ] = numAtomsCluster/(dims.atomDensity*ls[XX]*ls[YY]);
/* The formulas below are a heuristic estimate of the average nsj per si*/
- r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(grid.na_c, ls);
+ r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls);
if (!nbs->DomDec || nbs->zones->n == 1)
{
else
{
nsp_est_nl =
- gmx::square(grid.atom_density/grid.na_c)*
+ gmx::square(dims.atomDensity/numAtomsCluster)*
nonlocal_vol2(nbs->zones, ls, r_eff_sup);
}
/* Estimate the number of cluster pairs as the local number of
* clusters times the volume they interact with times the density.
*/
- nsp_est = grid.nsubc_tot*vol_est*grid.atom_density/grid.na_c;
+ nsp_est = grid.numClusters()*vol_est*dims.atomDensity/numAtomsCluster;
/* Subtract the non-local pair count */
nsp_est -= nsp_est_nl;
* groups of atoms we'll anyhow be limited by nsubpair_target_min,
* so this overestimation will not matter.
*/
- nsp_est = std::max(nsp_est, grid.nsubc_tot*14._real);
+ nsp_est = std::max(nsp_est, grid.numClusters()*14._real);
if (debug)
{
}
/* Returns the next ci to be processes by our thread */
-static gmx_bool next_ci(const nbnxn_grid_t &grid,
+static gmx_bool next_ci(const Grid &grid,
int nth, int ci_block,
int *ci_x, int *ci_y,
int *ci_b, int *ci)
*ci_b = 0;
}
- if (*ci >= grid.nc)
+ if (*ci >= grid.numCells())
{
return FALSE;
}
- while (*ci >= grid.cxy_ind[*ci_x*grid.numCells[YY] + *ci_y + 1])
+ while (*ci >= grid.firstCellInColumn(*ci_x*grid.dimensions().numCells[YY] + *ci_y + 1))
{
*ci_y += 1;
- if (*ci_y == grid.numCells[YY])
+ if (*ci_y == grid.dimensions().numCells[YY])
{
*ci_x += 1;
*ci_y = 0;
/* Returns the distance^2 for which we put cell pairs in the list
* without checking atom pair distances. This is usually < rlist^2.
*/
-static float boundingbox_only_distance2(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
- real rlist,
- gmx_bool simple)
+static float boundingbox_only_distance2(const Grid::Dimensions &iGridDims,
+ const Grid::Dimensions &jGridDims,
+ real rlist,
+ gmx_bool simple)
{
/* If the distance between two sub-cell bounding boxes is less
* than this distance, do not check the distance between
real bbx, bby;
real rbb2;
- bbx = 0.5*(iGrid.cellSize[XX] + jGrid.cellSize[XX]);
- bby = 0.5*(iGrid.cellSize[YY] + jGrid.cellSize[YY]);
+ bbx = 0.5*(iGridDims.cellSize[XX] + jGridDims.cellSize[XX]);
+ bby = 0.5*(iGridDims.cellSize[YY] + jGridDims.cellSize[YY]);
if (!simple)
{
bbx /= c_gpuNumClusterPerCellX;
#endif
}
-static int get_ci_block_size(const nbnxn_grid_t &iGrid,
+static int get_ci_block_size(const Grid &iGrid,
gmx_bool bDomDec, int nth)
{
const int ci_block_enum = 5;
* zone boundaries with 3D domain decomposition. At the same time
* the blocks will not become too small.
*/
- ci_block = (iGrid.nc*ci_block_enum)/(ci_block_denom*iGrid.numCells[XX]*nth);
+ ci_block = (iGrid.numCells()*ci_block_enum)/(ci_block_denom*iGrid.dimensions().numCells[XX]*nth);
+
+ const int numAtomsPerCell = iGrid.geometry().numAtomsPerCell;
/* Ensure the blocks are not too small: avoids cache invalidation */
- if (ci_block*iGrid.na_sc < ci_block_min_atoms)
+ if (ci_block*numAtomsPerCell < ci_block_min_atoms)
{
- ci_block = (ci_block_min_atoms + iGrid.na_sc - 1)/iGrid.na_sc;
+ ci_block = (ci_block_min_atoms + numAtomsPerCell - 1)/numAtomsPerCell;
}
/* Without domain decomposition
* or with less than 3 blocks per task, divide in nth blocks.
*/
- if (!bDomDec || nth*3*ci_block > iGrid.nc)
+ if (!bDomDec || nth*3*ci_block > iGrid.numCells())
{
- ci_block = (iGrid.nc + nth - 1)/nth;
+ ci_block = (iGrid.numCells() + nth - 1)/nth;
}
- if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.nc)
+ if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.numCells())
{
/* Some threads have no work. Although reducing the block size
* does not decrease the block count on the first few threads,
return false;
}
-static void makeClusterListWrapper(NbnxnPairlistCpu *nbl,
- const nbnxn_grid_t gmx_unused &iGrid,
- const int ci,
- const nbnxn_grid_t &jGrid,
- const int firstCell,
- const int lastCell,
- const bool excludeSubDiagonal,
- const nbnxn_atomdata_t *nbat,
- const real rlist2,
- const real rbb2,
- const Nbnxm::KernelType kernelType,
- int *numDistanceChecks)
+static void makeClusterListWrapper(NbnxnPairlistCpu *nbl,
+ const Grid gmx_unused &iGrid,
+ const int ci,
+ const Grid &jGrid,
+ const int firstCell,
+ const int lastCell,
+ const bool excludeSubDiagonal,
+ const nbnxn_atomdata_t *nbat,
+ const real rlist2,
+ const real rbb2,
+ const Nbnxm::KernelType kernelType,
+ int *numDistanceChecks)
{
switch (kernelType)
{
}
}
-static void makeClusterListWrapper(NbnxnPairlistGpu *nbl,
- const nbnxn_grid_t &gmx_unused iGrid,
- const int ci,
- const nbnxn_grid_t &jGrid,
- const int firstCell,
- const int lastCell,
- const bool excludeSubDiagonal,
- const nbnxn_atomdata_t *nbat,
- const real rlist2,
- const real rbb2,
- Nbnxm::KernelType gmx_unused kernelType,
- int *numDistanceChecks)
+static void makeClusterListWrapper(NbnxnPairlistGpu *nbl,
+ const Grid &gmx_unused iGrid,
+ const int ci,
+ const Grid &jGrid,
+ const int firstCell,
+ const int lastCell,
+ const bool excludeSubDiagonal,
+ const nbnxn_atomdata_t *nbat,
+ const real rlist2,
+ const real rbb2,
+ Nbnxm::KernelType gmx_unused kernelType,
+ int *numDistanceChecks)
{
for (int cj = firstCell; cj <= lastCell; cj++)
{
/* Generates the part of pair-list nbl assigned to our thread */
template <typename T>
static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
nbnxn_search_work_t *work,
const nbnxn_atomdata_t *nbat,
const t_blocka &exclusions,
nbs_cycle_start(&work->cc[enbsCCsearch]);
- if (jGrid.bSimple != pairlistIsSimple(*nbl) ||
- iGrid.bSimple != pairlistIsSimple(*nbl))
+ if (jGrid.geometry().isSimple != pairlistIsSimple(*nbl) ||
+ iGrid.geometry().isSimple != pairlistIsSimple(*nbl))
{
gmx_incons("Grid incompatible with pair-list");
}
sync_work(nbl);
- GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match");
+ GMX_ASSERT(nbl->na_ci == jGrid.geometry().numAtomsICluster,
+ "The cluster sizes in the list and grid should match");
nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType];
na_cj_2log = get_2log(nbl->na_cj);
rl_fep2 = rl_fep2*rl_fep2;
}
- rbb2 = boundingbox_only_distance2(iGrid, jGrid, nbl->rlist, pairlistIsSimple(*nbl));
+ const Grid::Dimensions &iGridDims = iGrid.dimensions();
+ const Grid::Dimensions &jGridDims = jGrid.dimensions();
+
+ rbb2 = boundingbox_only_distance2(iGridDims, jGridDims, nbl->rlist, pairlistIsSimple(*nbl));
if (debug)
{
}
else
{
- const real listRangeCellToCell = listRangeForGridCellToGridCell(rlist, iGrid, jGrid);
+ const real listRangeCellToCell =
+ listRangeForGridCellToGridCell(rlist, iGrid.dimensions(), jGrid.dimensions());
if (d == XX &&
box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < listRangeCellToCell)
{
gmx::ArrayRef<const float> pbb_i;
if (bSimple)
{
- bb_i = iGrid.bb;
+ bb_i = iGrid.iBoundingBoxes();
}
else
{
- pbb_i = iGrid.pbb;
+ pbb_i = iGrid.packedBoundingBoxes();
}
#else
/* We use the normal bounding box format for both grid types */
- bb_i = iGrid.bb;
+ bb_i = iGrid.iBoundingBoxes();
#endif
- gmx::ArrayRef<const float> bbcz_i = iGrid.bbcz;
- gmx::ArrayRef<const int> flags_i = iGrid.flags;
- gmx::ArrayRef<const float> bbcz_j = jGrid.bbcz;
- int cell0_i = iGrid.cell0;
+ gmx::ArrayRef<const float> bbcz_i = iGrid.zBoundingBoxes();
+ gmx::ArrayRef<const int> flags_i = iGrid.clusterFlags();
+ gmx::ArrayRef<const float> bbcz_j = jGrid.zBoundingBoxes();
+ int cell0_i = iGrid.cellOffset();
if (debug)
{
fprintf(debug, "nbl nc_i %d col.av. %.1f ci_block %d\n",
- iGrid.nc, iGrid.nc/static_cast<double>(iGrid.numCells[XX]*iGrid.numCells[YY]), ci_block);
+ iGrid.numCells(), iGrid.numCells()/static_cast<double>(iGrid.numColumns()), ci_block);
}
numDistanceChecks = 0;
- const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid));
+ const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid.dimensions()));
/* Initially ci_b and ci to 1 before where we want them to start,
* as they will both be incremented in next_ci.
}
else
{
- bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX];
+ bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX];
}
- if (bx1 < jGrid.c0[XX])
+ if (bx1 < jGridDims.lowerCorner[XX])
{
- d2cx = gmx::square(jGrid.c0[XX] - bx1);
+ d2cx = gmx::square(jGridDims.lowerCorner[XX] - bx1);
if (d2cx >= listRangeBBToJCell2)
{
}
}
- ci_xy = ci_x*iGrid.numCells[YY] + ci_y;
+ ci_xy = ci_x*iGridDims.numCells[YY] + ci_y;
/* Loop over shift vectors in three dimensions */
for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
continue;
}
- bz1_frac = bz1/(iGrid.cxy_ind[ci_xy+1] - iGrid.cxy_ind[ci_xy]);
+ bz1_frac = bz1/iGrid.numCellsInColumn(ci_xy);
if (bz1_frac < 0)
{
bz1_frac = 0;
}
else
{
- by0 = iGrid.c0[YY] + (ci_y )*iGrid.cellSize[YY] + shy;
- by1 = iGrid.c0[YY] + (ci_y+1)*iGrid.cellSize[YY] + shy;
+ by0 = iGridDims.lowerCorner[YY] + (ci_y )*iGridDims.cellSize[YY] + shy;
+ by1 = iGridDims.lowerCorner[YY] + (ci_y + 1)*iGridDims.cellSize[YY] + shy;
}
get_cell_range<YY>(by0, by1,
- jGrid,
+ jGridDims,
d2z_cx, rlist,
&cyf, &cyl);
}
d2z_cy = d2z;
- if (by1 < jGrid.c0[YY])
+ if (by1 < jGridDims.lowerCorner[YY])
{
- d2z_cy += gmx::square(jGrid.c0[YY] - by1);
+ d2z_cy += gmx::square(jGridDims.lowerCorner[YY] - by1);
}
- else if (by0 > jGrid.c1[YY])
+ else if (by0 > jGridDims.upperCorner[YY])
{
- d2z_cy += gmx::square(by0 - jGrid.c1[YY]);
+ d2z_cy += gmx::square(by0 - jGridDims.upperCorner[YY]);
}
for (int tx = -shp[XX]; tx <= shp[XX]; tx++)
}
else
{
- bx0 = iGrid.c0[XX] + (ci_x )*iGrid.cellSize[XX] + shx;
- bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX] + shx;
+ bx0 = iGridDims.lowerCorner[XX] + (ci_x )*iGridDims.cellSize[XX] + shx;
+ bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX] + shx;
}
get_cell_range<XX>(bx0, bx1,
- jGrid,
+ jGridDims,
d2z_cy, rlist,
&cxf, &cxl);
for (int cx = cxf; cx <= cxl; cx++)
{
d2zx = d2z;
- if (jGrid.c0[XX] + cx*jGrid.cellSize[XX] > bx1)
+ if (jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] > bx1)
{
- d2zx += gmx::square(jGrid.c0[XX] + cx*jGrid.cellSize[XX] - bx1);
+ d2zx += gmx::square(jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] - bx1);
}
- else if (jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] < bx0)
+ else if (jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] < bx0)
{
- d2zx += gmx::square(jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] - bx0);
+ d2zx += gmx::square(jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] - bx0);
}
if (isIntraGridList &&
for (int cy = cyf_x; cy <= cyl; cy++)
{
- const int columnStart = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy];
- const int columnEnd = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy + 1];
+ const int columnStart = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy);
+ const int columnEnd = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy + 1);
d2zxy = d2zx;
- if (jGrid.c0[YY] + cy*jGrid.cellSize[YY] > by1)
+ if (jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] > by1)
{
- d2zxy += gmx::square(jGrid.c0[YY] + cy*jGrid.cellSize[YY] - by1);
+ d2zxy += gmx::square(jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] - by1);
}
- else if (jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] < by0)
+ else if (jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] < by0)
{
- d2zxy += gmx::square(jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] - by0);
+ d2zxy += gmx::square(jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] - by0);
}
if (columnStart < columnEnd && d2zxy < listRangeBBToJCell2)
{
if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i)
{
- bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cell0+ci) >> gridi_flag_shift]), th);
+ bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
}
}
for (int zi = 0; zi < nzi; zi++)
{
- const nbnxn_grid_t &iGrid = nbs->grid[zi];
+ const Grid &iGrid = nbs->grid[zi];
int zj0;
int zj1;
}
for (int zj = zj0; zj < zj1; zj++)
{
- const nbnxn_grid_t &jGrid = nbs->grid[zj];
+ const Grid &jGrid = nbs->grid[zj];
if (debug)
{