using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
-
void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms)
{
numAtoms_ = numAtoms;
static int numAtomsFromGrids(const nbnxn_search &nbs)
{
- const nbnxn_grid_t &lastGrid = nbs.grid.back();
-
- return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc;
+ return nbs.grid.back().atomIndexEnd();
}
/* Sets the atom type in nbnxn_atomdata_t */
{
params->type.resize(numAtomsFromGrids(*nbs));
- for (const nbnxn_grid_t &grid : nbs->grid)
+ for (const Nbnxm::Grid &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
- for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++)
+ for (int i = 0; i < grid.numColumns(); i++)
{
- int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
- int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
+ const int numAtoms = grid.paddedNumAtomsInColumn(i);
+ const int atomOffset = grid.firstAtomInColumn(i);
- copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
- type, params->numTypes - 1, params->type.data() + ash);
+ copy_int_to_nbat_int(nbs->a.data() + atomOffset, grid.numAtomsInColumn(i), numAtoms,
+ type, params->numTypes - 1, params->type.data() + atomOffset);
}
}
}
if (params->comb_rule != ljcrNONE)
{
- for (const nbnxn_grid_t &grid : nbs->grid)
+ for (const Nbnxm::Grid &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
- for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++)
+ for (int i = 0; i < grid.numColumns(); i++)
{
- int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
- int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
+ const int numAtoms = grid.paddedNumAtomsInColumn(i);
+ const int atomOffset = grid.firstAtomInColumn(i);
if (XFormat == nbatX4)
{
copy_lj_to_nbat_lj_comb<c_packX4>(params->nbfp_comb,
- params->type.data() + ash,
- ncz*grid.na_sc,
- params->lj_comb.data() + ash*2);
+ params->type.data() + atomOffset,
+ numAtoms,
+ params->lj_comb.data() + atomOffset*2);
}
else if (XFormat == nbatX8)
{
copy_lj_to_nbat_lj_comb<c_packX8>(params->nbfp_comb,
- params->type.data() + ash,
- ncz*grid.na_sc,
- params->lj_comb.data() + ash*2);
+ params->type.data() + atomOffset,
+ numAtoms,
+ params->lj_comb.data() + atomOffset*2);
}
else if (XFormat == nbatXYZQ)
{
copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb,
- params->type.data() + ash,
- ncz*grid.na_sc,
- params->lj_comb.data() + ash*2);
+ params->type.data() + atomOffset,
+ numAtoms,
+ params->lj_comb.data() + atomOffset*2);
}
}
}
nbat->paramsDeprecated().q.resize(nbat->numAtoms());
}
- for (const nbnxn_grid_t &grid : nbs->grid)
+ for (const Nbnxm::Grid &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
- for (int cxy = 0; cxy < grid.numCells[XX]*grid.numCells[YY]; cxy++)
+ for (int cxy = 0; cxy < grid.numColumns(); cxy++)
{
- int ash = (grid.cell0 + grid.cxy_ind[cxy])*grid.na_sc;
- int na = grid.cxy_na[cxy];
- int na_round = (grid.cxy_ind[cxy+1] - grid.cxy_ind[cxy])*grid.na_sc;
+ const int atomOffset = grid.firstAtomInColumn(cxy);
+ const int numAtoms = grid.numAtomsInColumn(cxy);
+ const int paddedNumAtoms = grid.paddedNumAtomsInColumn(cxy);
if (nbat->XFormat == nbatXYZQ)
{
- real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1;
+ real *q = nbat->x().data() + atomOffset*STRIDE_XYZQ + ZZ + 1;
int i;
- for (i = 0; i < na; i++)
+ for (i = 0; i < numAtoms; i++)
{
- *q = charge[nbs->a[ash+i]];
+ *q = charge[nbs->a[atomOffset + i]];
q += STRIDE_XYZQ;
}
/* Complete the partially filled last cell with zeros */
- for (; i < na_round; i++)
+ for (; i < paddedNumAtoms; i++)
{
*q = 0;
q += STRIDE_XYZQ;
}
else
{
- real *q = nbat->paramsDeprecated().q.data() + ash;
+ real *q = nbat->paramsDeprecated().q.data() + atomOffset;
int i;
- for (i = 0; i < na; i++)
+ for (i = 0; i < numAtoms; i++)
{
- *q = charge[nbs->a[ash+i]];
+ *q = charge[nbs->a[atomOffset + i]];
q++;
}
/* Complete the partially filled last cell with zeros */
- for (; i < na_round; i++)
+ for (; i < paddedNumAtoms; i++)
{
*q = 0;
q++;
stride_q = 1;
}
- for (const nbnxn_grid_t &grid : nbs->grid)
+ for (const Nbnxm::Grid &grid : nbs->grid)
{
int nsubc;
- if (grid.bSimple)
+ if (grid.geometry().isSimple)
{
nsubc = 1;
}
nsubc = c_gpuNumClusterPerCell;
}
- int c_offset = grid.cell0*grid.na_sc;
+ int c_offset = grid.firstAtomInColumn(0);
/* Loop over all columns and copy and fill */
- for (int c = 0; c < grid.nc*nsubc; c++)
+ for (int c = 0; c < grid.numCells()*nsubc; c++)
{
/* Does this cluster contain perturbed particles? */
- if (grid.fep[c] != 0)
+ if (grid.clusterIsPerturbed(c))
{
- for (int i = 0; i < grid.na_c; i++)
+ const int numAtomsPerCluster = grid.geometry().numAtomsICluster;
+ for (int i = 0; i < numAtomsPerCluster; i++)
{
/* Is this a perturbed particle? */
- if (grid.fep[c] & (1 << i))
+ if (grid.atomIsPerturbed(c, i))
{
- int ind = c_offset + c*grid.na_c + i;
+ int ind = c_offset + c*numAtomsPerCluster + i;
/* Set atom type and charge to non-interacting */
params.type[ind] = params.numTypes - 1;
q[ind*stride_q] = 0;
params->energrp.resize(numAtomsFromGrids(*nbs));
- for (const nbnxn_grid_t &grid : nbs->grid)
+ for (const Nbnxm::Grid &grid : nbs->grid)
{
/* Loop over all columns and copy and fill */
- for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++)
+ for (int i = 0; i < grid.numColumns(); i++)
{
- int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i];
- int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc;
+ const int numAtoms = grid.paddedNumAtomsInColumn(i);
+ const int atomOffset = grid.firstAtomInColumn(i);
- copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc,
+ copy_egp_to_nbat_egps(nbs->a.data() + atomOffset, grid.numAtomsInColumn(i), numAtoms,
c_nbnxnCpuIClusterSize, params->neg_2log,
atinfo,
- params->energrp.data() + (ash >> grid.na_c_2log));
+ params->energrp.data() + grid.atomToCluster(atomOffset));
}
}
}
if (FillLocal)
{
- nbat->natoms_local = nbs->grid[0].nc*nbs->grid[0].na_sc;
+ nbat->natoms_local = nbs->grid[0].atomIndexEnd();
}
nth = gmx_omp_nthreads_get(emntPairsearch);
{
for (int g = g0; g < g1; g++)
{
- const nbnxn_grid_t &grid = nbs->grid[g];
- const int numCellsXY = grid.numCells[XX]*grid.numCells[YY];
+ const Nbnxm::Grid &grid = nbs->grid[g];
+ const int numCellsXY = grid.numColumns();
const int cxy0 = (numCellsXY* th + nth - 1)/nth;
const int cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth;
for (int cxy = cxy0; cxy < cxy1; cxy++)
{
- int na, ash, na_fill;
-
- na = grid.cxy_na[cxy];
- ash = (grid.cell0 + grid.cxy_ind[cxy])*grid.na_sc;
+ const int na = grid.numAtomsInColumn(cxy);
+ const int ash = grid.firstAtomInColumn(cxy);
+ int na_fill;
if (g == 0 && FillLocal)
{
- na_fill =
- (grid.cxy_ind[cxy+1] - grid.cxy_ind[cxy])*grid.na_sc;
+ na_fill = grid.paddedNumAtomsInColumn(cxy);
}
else
{
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief
+ * Implements the Grid class.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_nbnxm
+ */
+
#include "gmxpre.h"
#include "grid.h"
#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/nbnxm_geometry.h"
+#include "gromacs/nbnxm/pairlist.h"
+#include "gromacs/nbnxm/pairlistset.h"
#include "gromacs/simd/simd.h"
#include "gromacs/simd/vector_operations.h"
#include "gromacs/utility/exceptions.h"
struct gmx_domdec_zones_t;
-static real grid_atom_density(int numAtoms,
- const rvec lowerCorner,
- const rvec upperCorner)
+namespace Nbnxm
+{
+
+Grid::Geometry::Geometry(const PairlistType pairlistType) :
+ isSimple(pairlistType != PairlistType::Hierarchical8x8),
+ numAtomsICluster(IClusterSizePerListType[pairlistType]),
+ numAtomsJCluster(JClusterSizePerListType[pairlistType]),
+ numAtomsPerCell((isSimple ? 1 : c_gpuNumClusterPerCell)*numAtomsICluster),
+ numAtomsICluster2Log(get_2log(numAtomsICluster))
+{
+}
+
+Grid::Grid(const PairlistType pairlistType) :
+ geometry_(pairlistType)
+{
+}
+
+/*! \brief Returns the atom density (> 0) of a rectangular grid */
+static real gridAtomDensity(int numAtoms,
+ const rvec lowerCorner,
+ const rvec upperCorner)
{
rvec size;
return numAtoms/(size[XX]*size[YY]*size[ZZ]);
}
-static void set_grid_size_xy(const nbnxn_search *nbs,
- nbnxn_grid_t *grid,
- int ddZone,
- int numAtoms,
- const rvec lowerCorner,
- const rvec upperCorner,
- real atomDensity)
+void Grid::setDimensions(const nbnxn_search *nbs,
+ const int ddZone,
+ const int numAtoms,
+ const rvec lowerCorner,
+ const rvec upperCorner,
+ real atomDensity,
+ const real maxAtomGroupRadius)
{
- rvec size;
- real tlen, tlen_x, tlen_y;
+ /* For the home zone we compute the density when not set (=-1) or when =0 */
+ if (ddZone == 0 && atomDensity <= 0)
+ {
+ atomDensity = gridAtomDensity(numAtoms, lowerCorner, upperCorner);
+ }
+ dimensions_.atomDensity = atomDensity;
+ dimensions_.maxAtomGroupRadius = maxAtomGroupRadius;
+
+ rvec size;
rvec_sub(upperCorner, lowerCorner, size);
- if (numAtoms > grid->na_sc)
+ if (numAtoms > geometry_.numAtomsPerCell)
{
GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive");
/* target cell length */
- if (grid->bSimple)
+ real tlen_x;
+ real tlen_y;
+ if (geometry_.isSimple)
{
/* To minimize the zero interactions, we should make
* the largest of the i/j cell cubic.
*/
- int numAtomsInCell = std::max(grid->na_c, grid->na_cj);
+ int numAtomsInCell = std::max(geometry_.numAtomsICluster,
+ geometry_.numAtomsJCluster);
/* Approximately cubic cells */
- tlen = std::cbrt(numAtomsInCell/atomDensity);
- tlen_x = tlen;
- tlen_y = tlen;
+ real tlen = std::cbrt(numAtomsInCell/atomDensity);
+ tlen_x = tlen;
+ tlen_y = tlen;
}
else
{
/* Approximately cubic sub cells */
- tlen = std::cbrt(grid->na_c/atomDensity);
- tlen_x = tlen*c_gpuNumClusterPerCellX;
- tlen_y = tlen*c_gpuNumClusterPerCellY;
+ real tlen = std::cbrt(geometry_.numAtomsICluster/atomDensity);
+ tlen_x = tlen*c_gpuNumClusterPerCellX;
+ tlen_y = tlen*c_gpuNumClusterPerCellY;
}
/* We round ncx and ncy down, because we get less cell pairs
* in the nbsist when the fixed cell dimensions (x,y) are
* larger than the variable one (z) than the other way around.
*/
- grid->numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
- grid->numCells[YY] = std::max(1, static_cast<int>(size[YY]/tlen_y));
+ dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
+ dimensions_.numCells[YY] = std::max(1, static_cast<int>(size[YY]/tlen_y));
}
else
{
- grid->numCells[XX] = 1;
- grid->numCells[YY] = 1;
+ dimensions_.numCells[XX] = 1;
+ dimensions_.numCells[YY] = 1;
}
for (int d = 0; d < DIM - 1; d++)
{
- grid->cellSize[d] = size[d]/grid->numCells[d];
- grid->invCellSize[d] = 1/grid->cellSize[d];
+ dimensions_.cellSize[d] = size[d]/dimensions_.numCells[d];
+ dimensions_.invCellSize[d] = 1/dimensions_.cellSize[d];
}
if (ddZone > 0)
* they end up on the grid, but for performance it's better
* if they don't end up in cells that can be within cut-off range.
*/
- grid->numCells[XX]++;
- grid->numCells[YY]++;
+ dimensions_.numCells[XX]++;
+ dimensions_.numCells[YY]++;
}
/* We need one additional cell entry for particles moved by DD */
- int numCellsXY = grid->numCells[XX]*grid->numCells[YY];
- grid->cxy_na.resize(numCellsXY + 1);
- grid->cxy_ind.resize(numCellsXY + 2);
+ cxy_na_.resize(numColumns() + 1);
+ cxy_ind_.resize(numColumns() + 2);
for (nbnxn_search_work_t &work : nbs->work)
{
- work.cxy_na.resize(numCellsXY + 1);
+ work.cxy_na.resize(numColumns() + 1);
}
/* Worst case scenario of 1 atom in each last cell */
int maxNumCells;
- if (grid->na_cj <= grid->na_c)
+ if (geometry_.numAtomsJCluster <= geometry_.numAtomsICluster)
{
- maxNumCells = numAtoms/grid->na_sc + numCellsXY;
+ maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns();
}
else
{
- maxNumCells = numAtoms/grid->na_sc + numCellsXY*grid->na_cj/grid->na_c;
+ maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns()*geometry_.numAtomsJCluster/geometry_.numAtomsICluster;
}
- grid->nsubc.resize(maxNumCells);
- grid->bbcz.resize(maxNumCells*NNBSBB_D);
+ if (!geometry_.isSimple)
+ {
+ numClusters_.resize(maxNumCells);
+ }
+ bbcz_.resize(maxNumCells*NNBSBB_D);
/* This resize also zeros the contents, this avoid possible
* floating exceptions in SIMD with the unused bb elements.
*/
- if (grid->bSimple)
+ if (geometry_.isSimple)
{
- grid->bb.resize(maxNumCells);
+ bb_.resize(maxNumCells);
}
else
{
#if NBNXN_BBXXXX
- grid->pbb.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX);
+ pbb_.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX);
#else
- grid->bb.resize(maxNumCells*c_gpuNumClusterPerCell);
+ bb_.resize(maxNumCells*c_gpuNumClusterPerCell);
#endif
}
- if (grid->bSimple)
+ if (geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
{
- if (grid->na_cj == grid->na_c)
- {
- grid->bbj = grid->bb;
- }
- else
- {
- grid->bbjStorage.resize(maxNumCells*grid->na_c/grid->na_cj);
- grid->bbj = grid->bbjStorage;
- }
+ bbj_ = bb_;
+ }
+ else
+ {
+ GMX_ASSERT(geometry_.isSimple, "Only CPU lists should have different i/j cluster sizes");
+
+ bbjStorage_.resize(maxNumCells*geometry_.numAtomsICluster/geometry_.numAtomsJCluster);
+ bbj_ = bbjStorage_;
}
- grid->flags.resize(maxNumCells);
+ flags_.resize(maxNumCells);
if (nbs->bFEP)
{
- grid->fep.resize(maxNumCells*grid->na_sc/grid->na_c);
+ fep_.resize(maxNumCells*geometry_.numAtomsPerCell/geometry_.numAtomsICluster);
}
- copy_rvec(lowerCorner, grid->c0);
- copy_rvec(upperCorner, grid->c1);
- copy_rvec(size, grid->size);
+ copy_rvec(lowerCorner, dimensions_.lowerCorner);
+ copy_rvec(upperCorner, dimensions_.upperCorner);
+ copy_rvec(size, dimensions_.gridSize);
}
/* We need to sort paricles in grid columns on z-coordinate.
- * As particle are very often distributed homogeneously, we a sorting
+ * As particle are very often distributed homogeneously, we use a sorting
* algorithm similar to pigeonhole sort. We multiply the z-coordinate
* by a factor, cast to an int and try to store in that hole. If the hole
* is full, we move this or another particle. A second pass is needed to make
* for an O(#particles) sort up till distributions were all particles are
* concentrated in 1/4 of the space. No NlogN fallback is implemented,
* as it can be expensive to detect imhomogeneous particle distributions.
- * SGSF is the maximum ratio of holes used, in the worst case all particles
- * end up in the last hole and we need #particles extra holes at the end.
*/
-#define SORT_GRID_OVERSIZE 4
-#define SGSF (SORT_GRID_OVERSIZE + 1)
+/*! \brief Ratio of grid cells to atoms */
+static constexpr int c_sortGridRatio = 4;
+/*! \brief Maximum ratio of holes used, in the worst case all particles
+ * end up in the last hole and we need num. atoms extra holes at the end.
+ */
+static constexpr int c_sortGridMaxSizeFactor = c_sortGridRatio + 1;
-/* Sort particle index a on coordinates x along dim.
+/*! \brief Sorts particle index a on coordinates x along dim.
+ *
* Backwards tells if we want decreasing iso increasing coordinates.
* h0 is the minimum of the coordinate range.
* invh is the 1/length of the sorting range.
* n_per_h (>=n) is the expected average number of particles per 1/invh
* sort is the sorting work array.
- * sort should have a size of at least n_per_h*SORT_GRID_OVERSIZE + n,
- * or easier, allocate at least n*SGSF elements.
+ * sort should have a size of at least n_per_h*c_sortGridRatio + n,
+ * or easier, allocate at least n*c_sortGridMaxSizeFactor elements.
*/
static void sort_atoms(int dim, gmx_bool Backwards,
int gmx_unused dd_zone,
GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h");
/* Transform the inverse range height into the inverse hole height */
- invh *= n_per_h*SORT_GRID_OVERSIZE;
+ invh *= n_per_h*c_sortGridRatio;
/* Set nsort to the maximum possible number of holes used.
* In worst case all n elements end up in the last bin.
*/
- int nsort = n_per_h*SORT_GRID_OVERSIZE + n;
+ int nsort = n_per_h*c_sortGridRatio + n;
/* Determine the index range used, so we can limit it for the second pass */
int zi_min = INT_MAX;
#ifndef NDEBUG
/* As we can have rounding effect, we use > iso >= here */
if (relevantAtomsAreWithinGridBounds &&
- (zi < 0 || (dd_zone == 0 && zi > n_per_h*SORT_GRID_OVERSIZE)))
+ (zi < 0 || (dd_zone == 0 && zi > n_per_h*c_sortGridRatio)))
{
gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi,
- n_per_h, SORT_GRID_OVERSIZE);
+ n_per_h, c_sortGridRatio);
}
#endif
if (zi < 0)
* can be far beyond the grid size, which is set by the non-bonded
* cut-off distance. We sort such particles into the last cell.
*/
- if (zi > n_per_h*SORT_GRID_OVERSIZE)
+ if (zi > n_per_h*c_sortGridRatio)
{
- zi = n_per_h*SORT_GRID_OVERSIZE;
+ zi = n_per_h*c_sortGridRatio;
}
/* Ideally this particle should go in sort cell zi,
}
#if GMX_DOUBLE
-#define R2F_D(x) ((float)((x) >= 0 ? ((1-GMX_FLOAT_EPS)*(x)) : ((1+GMX_FLOAT_EPS)*(x))))
-#define R2F_U(x) ((float)((x) >= 0 ? ((1+GMX_FLOAT_EPS)*(x)) : ((1-GMX_FLOAT_EPS)*(x))))
+//! Returns double up to one least significant float bit smaller than x
+static double R2F_D(const float x)
+{
+ return static_cast<float>(x >= 0 ? (1 - GMX_FLOAT_EPS)*x : (1 + GMX_FLOAT_EPS)*x);
+}
+//! Returns double up to one least significant float bit larger than x
+static double R2F_U(const float x)
+{
+ return static_cast<float>(x >= 0 ? (1 + GMX_FLOAT_EPS)*x : (1 - GMX_FLOAT_EPS)*x);
+}
#else
-#define R2F_D(x) (x)
-#define R2F_U(x) (x)
+//! Returns x
+static float R2F_D(const float x)
+{
+ return x;
+}
+//! Returns x
+static float R2F_U(const float x)
+{
+ return x;
+}
#endif
-/* Coordinate order x,y,z, bb order xyz0 */
-static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb)
+//! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0
+static void calc_bounding_box(int na, int stride, const real *x,
+ BoundingBox *bb)
{
int i;
real xl, xh, yl, yh, zl, zh;
bb->upper[BB_Z] = R2F_U(zh);
}
-/* Packed coordinates, bb order xyz0 */
-static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb)
+/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
+static void calc_bounding_box_x_x4(int na, const real *x,
+ BoundingBox *bb)
{
real xl, xh, yl, yh, zl, zh;
bb->upper[BB_Z] = R2F_U(zh);
}
-/* Packed coordinates, bb order xyz0 */
-static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb)
+/*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
+static void calc_bounding_box_x_x8(int na, const real *x,
+ BoundingBox *bb)
{
real xl, xh, yl, yh, zl, zh;
bb->upper[BB_Z] = R2F_U(zh);
}
-/* Packed coordinates, bb order xyz0 */
+/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x,
- nbnxn_bb_t *bb, nbnxn_bb_t *bbj)
+ BoundingBox *bb,
+ BoundingBox *bbj)
{
// TODO: During SIMDv2 transition only some archs use namespace (remove when done)
using namespace gmx;
#if NBNXN_SEARCH_BB_SIMD4
-/* Coordinate order xyz, bb order xxxxyyyyzzzz */
+/*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */
static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
{
int i;
#if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
-/* Coordinate order xyz?, bb order xyz0 */
-static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb)
+/*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */
+static void calc_bounding_box_simd4(int na, const float *x,
+ BoundingBox *bb)
{
// TODO: During SIMDv2 transition only some archs use namespace (remove when done)
using namespace gmx;
store4(&bb->upper[0], bb_1_S);
}
-/* Coordinate order xyz?, bb order xxxxyyyyzzzz */
+/*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
static void calc_bounding_box_xxxx_simd4(int na, const float *x,
- nbnxn_bb_t *bb_work_aligned,
+ BoundingBox *bb_work_aligned,
real *bb)
{
calc_bounding_box_simd4(na, x, bb_work_aligned);
#endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
-/* Combines pairs of consecutive bounding boxes */
-static void combine_bounding_box_pairs(nbnxn_grid_t *grid,
- gmx::ArrayRef<const nbnxn_bb_t> bb)
+/*! \brief Combines pairs of consecutive bounding boxes */
+static void combine_bounding_box_pairs(const Grid &grid,
+ gmx::ArrayRef<const BoundingBox> bb,
+ gmx::ArrayRef<BoundingBox> bbj)
{
// TODO: During SIMDv2 transition only some archs use namespace (remove when done)
using namespace gmx;
- for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY]; i++)
+ for (int i = 0; i < grid.numColumns(); i++)
{
/* Starting bb in a column is expected to be 2-aligned */
- int sc2 = grid->cxy_ind[i]>>1;
+ const int sc2 = grid.firstCellInColumn(i) >> 1;
/* For odd numbers skip the last bb here */
- int nc2 = (grid->cxy_na[i]+3)>>(2+1);
- int c2;
- for (c2 = sc2; c2 < sc2+nc2; c2++)
+ const int nc2 = (grid.numAtomsInColumn(i) + 3) >> (2 + 1);
+ int c2;
+ for (c2 = sc2; c2 < sc2 + nc2; c2++)
{
#if NBNXN_SEARCH_BB_SIMD4
Simd4Float min_S, max_S;
load4(&bb[c2*2+1].lower[0]));
max_S = max(load4(&bb[c2*2+0].upper[0]),
load4(&bb[c2*2+1].upper[0]));
- store4(&grid->bbj[c2].lower[0], min_S);
- store4(&grid->bbj[c2].upper[0], max_S);
+ store4(&bbj[c2].lower[0], min_S);
+ store4(&bbj[c2].upper[0], max_S);
#else
for (int j = 0; j < NNBSBB_C; j++)
{
- grid->bbj[c2].lower[j] = std::min(bb[c2*2+0].lower[j],
- bb[c2*2+1].lower[j]);
- grid->bbj[c2].upper[j] = std::max(bb[c2*2+0].upper[j],
- bb[c2*2+1].upper[j]);
+ bbj[c2].lower[j] = std::min(bb[c2*2 + 0].lower[j],
+ bb[c2*2 + 1].lower[j]);
+ bbj[c2].upper[j] = std::max(bb[c2*2 + 0].upper[j],
+ bb[c2*2 + 1].upper[j]);
}
#endif
}
- if (((grid->cxy_na[i]+3)>>2) & 1)
+ if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1)
{
/* The bb count in this column is odd: duplicate the last bb */
for (int j = 0; j < NNBSBB_C; j++)
{
- grid->bbj[c2].lower[j] = bb[c2*2].lower[j];
- grid->bbj[c2].upper[j] = bb[c2*2].upper[j];
+ bbj[c2].lower[j] = bb[c2*2].lower[j];
+ bbj[c2].upper[j] = bb[c2*2].upper[j];
}
}
}
}
-/* Prints the average bb size, used for debug output */
-static void print_bbsizes_simple(FILE *fp,
- const nbnxn_grid_t *grid)
+/*! \brief Prints the average bb size, used for debug output */
+static void print_bbsizes_simple(FILE *fp,
+ const Grid &grid)
{
dvec ba;
clear_dvec(ba);
- for (int c = 0; c < grid->nc; c++)
+ for (int c = 0; c < grid.numCells(); c++)
{
for (int d = 0; d < DIM; d++)
{
- ba[d] += grid->bb[c].upper[d] - grid->bb[c].lower[d];
+ ba[d] += grid.iBoundingBoxes()[c].upper[d] - grid.iBoundingBoxes()[c].lower[d];
}
}
- dsvmul(1.0/grid->nc, ba, ba);
+ dsvmul(1.0/grid.numCells(), ba, ba);
- real avgCellSizeZ = (grid->atom_density > 0 ? grid->na_c/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]) : 0.0);
+ const Grid::Dimensions &dims = grid.dimensions();
+ real avgCellSizeZ = (dims.atomDensity > 0 ? grid.geometry().numAtomsICluster/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]) : 0.0);
fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
- grid->cellSize[XX],
- grid->cellSize[YY],
+ dims.cellSize[XX],
+ dims.cellSize[YY],
avgCellSizeZ,
ba[XX], ba[YY], ba[ZZ],
- ba[XX]*grid->invCellSize[XX],
- ba[YY]*grid->invCellSize[YY],
- grid->atom_density > 0 ? ba[ZZ]/avgCellSizeZ : 0.0);
+ ba[XX]*dims.invCellSize[XX],
+ ba[YY]*dims.invCellSize[YY],
+ dims.atomDensity > 0 ? ba[ZZ]/avgCellSizeZ : 0.0);
}
-/* Prints the average bb size, used for debug output */
-static void print_bbsizes_supersub(FILE *fp,
- const nbnxn_grid_t *grid)
+/*! \brief Prints the average bb size, used for debug output */
+static void print_bbsizes_supersub(FILE *fp,
+ const Grid &grid)
{
int ns;
dvec ba;
clear_dvec(ba);
ns = 0;
- for (int c = 0; c < grid->nc; c++)
+ for (int c = 0; c < grid.numCells(); c++)
{
#if NBNXN_BBXXXX
- for (int s = 0; s < grid->nsubc[c]; s += STRIDE_PBB)
+ for (int s = 0; s < grid.numClustersPerCell()[c]; s += STRIDE_PBB)
{
int cs_w = (c*c_gpuNumClusterPerCell + s)/STRIDE_PBB;
for (int i = 0; i < STRIDE_PBB; i++)
for (int d = 0; d < DIM; d++)
{
ba[d] +=
- grid->pbb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] -
- grid->pbb[cs_w*NNBSBB_XXXX+ d *STRIDE_PBB+i];
+ grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + (DIM + d)*STRIDE_PBB + i] -
+ grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + d *STRIDE_PBB + i];
}
}
}
#else
- for (int s = 0; s < grid->nsubc[c]; s++)
+ for (int s = 0; s < grid.numClustersPerCell()[c]; s++)
{
int cs = c*c_gpuNumClusterPerCell + s;
for (int d = 0; d < DIM; d++)
{
- ba[d] += grid->bb[cs].upper[d] - grid->bb[cs].lower[d];
+ ba[d] += grid.iBoundingBoxes()[cs].upper[d] - grid.iBoundingBoxes()[cs].lower[d];
}
}
#endif
- ns += grid->nsubc[c];
+ ns += grid.numClustersPerCell()[c];
}
dsvmul(1.0/ns, ba, ba);
- real avgClusterSizeZ = (grid->atom_density > 0 ? grid->na_sc/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0);
+ const Grid::Dimensions &dims = grid.dimensions();
+ const real avgClusterSizeZ =
+ (dims.atomDensity > 0 ? grid.geometry().numAtomsPerCell/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0);
fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
- grid->cellSize[XX]/c_gpuNumClusterPerCellX,
- grid->cellSize[YY]/c_gpuNumClusterPerCellY,
+ dims.cellSize[XX]/c_gpuNumClusterPerCellX,
+ dims.cellSize[YY]/c_gpuNumClusterPerCellY,
avgClusterSizeZ,
ba[XX], ba[YY], ba[ZZ],
- ba[XX]*c_gpuNumClusterPerCellX*grid->invCellSize[XX],
- ba[YY]*c_gpuNumClusterPerCellY*grid->invCellSize[YY],
- grid->atom_density > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0);
+ ba[XX]*c_gpuNumClusterPerCellX*dims.invCellSize[XX],
+ ba[YY]*c_gpuNumClusterPerCellY*dims.invCellSize[YY],
+ dims.atomDensity > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0);
}
-/* Set non-bonded interaction flags for the current cluster.
+/*!\brief Set non-bonded interaction flags for the current cluster.
+ *
* Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
*/
static void sort_cluster_on_flag(int numAtomsInCluster,
}
}
-/* Fill a pair search cell with atoms.
+/*! \brief Fill a pair search cell with atoms.
+ *
* Potentially sorts atoms and sets the interaction flags.
*/
-static void fill_cell(nbnxn_search *nbs,
- nbnxn_grid_t *grid,
- nbnxn_atomdata_t *nbat,
- int atomStart,
- int atomEnd,
- const int *atinfo,
- gmx::ArrayRef<const gmx::RVec> x,
- nbnxn_bb_t gmx_unused *bb_work_aligned)
+void Grid::fillCell(nbnxn_search *nbs,
+ nbnxn_atomdata_t *nbat,
+ int atomStart,
+ int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ BoundingBox gmx_unused *bb_work_aligned)
{
const int numAtoms = atomEnd - atomStart;
- if (grid->bSimple)
+ if (geometry_.isSimple)
{
/* Note that non-local grids are already sorted.
* Then sort_cluster_on_flag will only set the flags and the sorting
* will not affect the atom order.
*/
- sort_cluster_on_flag(grid->na_c, atomStart, atomEnd, atinfo, nbs->a,
- grid->flags.data() + (atomStart >> grid->na_c_2log) - grid->cell0);
+ sort_cluster_on_flag(geometry_.numAtomsICluster, atomStart, atomEnd, atinfo, nbs->a,
+ flags_.data() + atomToCluster(atomStart) - cellOffset_);
}
if (nbs->bFEP)
/* Set the fep flag for perturbed atoms in this (sub-)cell */
/* The grid-local cluster/(sub-)cell index */
- int cell = (atomStart >> grid->na_c_2log) - grid->cell0*(grid->bSimple ? 1 : c_gpuNumClusterPerCell);
- grid->fep[cell] = 0;
+ int cell = atomToCluster(atomStart) - cellOffset_*(geometry_.isSimple ? 1 : c_gpuNumClusterPerCell);
+ fep_[cell] = 0;
for (int at = atomStart; at < atomEnd; at++)
{
if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]]))
{
- grid->fep[cell] |= (1 << (at - atomStart));
+ fep_[cell] |= (1 << (at - atomStart));
}
}
}
nbs->cell[nbs->a[at]] = at;
}
- copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c,
+ copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, geometry_.numAtomsICluster,
as_rvec_array(x.data()),
nbat->XFormat, nbat->x().data(), atomStart);
if (nbat->XFormat == nbatX4)
{
/* Store the bounding boxes as xyz.xyz. */
- size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
- nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
+ size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
+ BoundingBox *bb_ptr = bb_.data() + offset;
#if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
- if (2*grid->na_cj == grid->na_c)
+ if (2*geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
{
calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
- grid->bbj.data() + offset*2);
+ bbj_.data() + offset*2);
}
else
#endif
else if (nbat->XFormat == nbatX8)
{
/* Store the bounding boxes as xyz.xyz. */
- size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
- nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
+ size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
+ BoundingBox *bb_ptr = bb_.data() + offset;
calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
}
#if NBNXN_BBXXXX
- else if (!grid->bSimple)
+ else if (!geometry_.isSimple)
{
/* Store the bounding boxes in a format convenient
* for SIMD4 calculations: xxxxyyyyzzzz...
*/
float *pbb_ptr =
- grid->pbb.data() +
- ((atomStart - grid->cell0*grid->na_sc) >> (grid->na_c_2log + STRIDE_PBB_2LOG))*NNBSBB_XXXX +
- (((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log) & (STRIDE_PBB - 1));
+ pbb_.data() +
+ ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> (geometry_.numAtomsICluster2Log + STRIDE_PBB_2LOG))*NNBSBB_XXXX +
+ (((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log) & (STRIDE_PBB - 1));
#if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
if (nbat->XFormat == nbatXYZQ)
if (gmx_debug_at)
{
fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
- atomStart >> grid->na_c_2log,
+ atomToCluster(atomStart),
pbb_ptr[0*STRIDE_PBB], pbb_ptr[3*STRIDE_PBB],
pbb_ptr[1*STRIDE_PBB], pbb_ptr[4*STRIDE_PBB],
pbb_ptr[2*STRIDE_PBB], pbb_ptr[5*STRIDE_PBB]);
else
{
/* Store the bounding boxes as xyz.xyz. */
- nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log);
+ BoundingBox *bb_ptr = bb_.data() + atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
bb_ptr);
if (gmx_debug_at)
{
- int bbo = (atomStart - grid->cell0*grid->na_sc)/grid->na_c;
+ int bbo = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
- atomStart >> grid->na_c_2log,
- grid->bb[bbo].lower[BB_X],
- grid->bb[bbo].lower[BB_Y],
- grid->bb[bbo].lower[BB_Z],
- grid->bb[bbo].upper[BB_X],
- grid->bb[bbo].upper[BB_Y],
- grid->bb[bbo].upper[BB_Z]);
+ atomToCluster(atomStart),
+ bb_[bbo].lower[BB_X],
+ bb_[bbo].lower[BB_Y],
+ bb_[bbo].lower[BB_Z],
+ bb_[bbo].upper[BB_X],
+ bb_[bbo].upper[BB_Y],
+ bb_[bbo].upper[BB_Z]);
}
}
}
-/* Spatially sort the atoms within one grid column */
-static void sort_columns_simple(nbnxn_search *nbs,
- int dd_zone,
- nbnxn_grid_t *grid,
- int atomStart, int atomEnd,
- const int *atinfo,
- gmx::ArrayRef<const gmx::RVec> x,
- nbnxn_atomdata_t *nbat,
- int cxy_start, int cxy_end,
- gmx::ArrayRef<int> sort_work)
+void Grid::sortColumnsCpuGeometry(nbnxn_search *nbs,
+ int dd_zone,
+ int atomStart, int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ nbnxn_atomdata_t *nbat,
+ int cxy_start, int cxy_end,
+ gmx::ArrayRef<int> sort_work)
{
if (debug)
{
- fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
- grid->cell0, cxy_start, cxy_end, atomStart, atomEnd);
+ fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
+ cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
}
- const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0);
+ const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
+
+ const int numAtomsPerCell = geometry_.numAtomsPerCell;
/* Sort the atoms within each x,y column in 3 dimensions */
for (int cxy = cxy_start; cxy < cxy_end; cxy++)
{
- int na = grid->cxy_na[cxy];
- int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
- int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
+ const int numAtoms = numAtomsInColumn(cxy);
+ const int numCellsZ = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
+ const int atomOffset = firstAtomInColumn(cxy);
/* Sort the atoms within each x,y column on z coordinate */
sort_atoms(ZZ, FALSE, dd_zone,
relevantAtomsAreWithinGridBounds,
- nbs->a.data() + ash, na, x,
- grid->c0[ZZ],
- 1.0/grid->size[ZZ], ncz*grid->na_sc,
+ nbs->a.data() + atomOffset, numAtoms, x,
+ dimensions_.lowerCorner[ZZ],
+ 1.0/dimensions_.gridSize[ZZ], numCellsZ*numAtomsPerCell,
sort_work);
/* Fill the ncz cells in this column */
- int cfilled = grid->cxy_ind[cxy];
- for (int cz = 0; cz < ncz; cz++)
+ const int firstCell = firstCellInColumn(cxy);
+ int cellFilled = firstCell;
+ for (int cellZ = 0; cellZ < numCellsZ; cellZ++)
{
- int c = grid->cxy_ind[cxy] + cz;
+ const int cell = firstCell + cellZ;
- int ash_c = ash + cz*grid->na_sc;
- int na_c = std::min(grid->na_sc, na-(ash_c-ash));
+ const int atomOffsetCell = atomOffset + cellZ*numAtomsPerCell;
+ const int numAtomsCell = std::min(numAtomsPerCell, numAtoms - (atomOffsetCell - atomOffset));
- fill_cell(nbs, grid, nbat,
- ash_c, ash_c+na_c, atinfo, x,
- nullptr);
+ fillCell(nbs, nbat,
+ atomOffsetCell, atomOffsetCell + numAtomsCell, atinfo, x,
+ nullptr);
/* This copy to bbcz is not really necessary.
* But it allows to use the same grid search code
* for the simple and supersub cell setups.
*/
- if (na_c > 0)
+ if (numAtomsCell > 0)
{
- cfilled = c;
+ cellFilled = cell;
}
- grid->bbcz[c*NNBSBB_D ] = grid->bb[cfilled].lower[BB_Z];
- grid->bbcz[c*NNBSBB_D+1] = grid->bb[cfilled].upper[BB_Z];
+ bbcz_[cell*NNBSBB_D ] = bb_[cellFilled].lower[BB_Z];
+ bbcz_[cell*NNBSBB_D + 1] = bb_[cellFilled].upper[BB_Z];
}
/* Set the unused atom indices to -1 */
- for (int ind = na; ind < ncz*grid->na_sc; ind++)
+ for (int ind = numAtoms; ind < numCellsZ*numAtomsPerCell; ind++)
{
- nbs->a[ash+ind] = -1;
+ nbs->a[atomOffset + ind] = -1;
}
}
}
/* Spatially sort the atoms within one grid column */
-static void sort_columns_supersub(nbnxn_search *nbs,
+void Grid::sortColumnsGpuGeometry(nbnxn_search *nbs,
int dd_zone,
- nbnxn_grid_t *grid,
int atomStart, int atomEnd,
const int *atinfo,
gmx::ArrayRef<const gmx::RVec> x,
int cxy_start, int cxy_end,
gmx::ArrayRef<int> sort_work)
{
- nbnxn_bb_t bb_work_array[2];
- nbnxn_bb_t *bb_work_aligned = reinterpret_cast<nbnxn_bb_t *>((reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
+ BoundingBox bb_work_array[2];
+ BoundingBox *bb_work_aligned = reinterpret_cast<BoundingBox *>((reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
if (debug)
{
- fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
- grid->cell0, cxy_start, cxy_end, atomStart, atomEnd);
+ fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
+ cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
}
- const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0);
+ const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
- int subdiv_x = grid->na_c;
- int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x;
- int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y;
+ const int numAtomsPerCell = geometry_.numAtomsPerCell;
+
+ const int subdiv_x = geometry_.numAtomsICluster;
+ const int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x;
+ const int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y;
/* Sort the atoms within each x,y column in 3 dimensions.
* Loop over all columns on the x/y grid.
*/
for (int cxy = cxy_start; cxy < cxy_end; cxy++)
{
- int gridX = cxy/grid->numCells[YY];
- int gridY = cxy - gridX*grid->numCells[YY];
+ const int gridX = cxy/dimensions_.numCells[YY];
+ const int gridY = cxy - gridX*dimensions_.numCells[YY];
- int numAtomsInColumn = grid->cxy_na[cxy];
- int numCellsInColumn = grid->cxy_ind[cxy + 1] - grid->cxy_ind[cxy];
- int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
+ const int numAtomsInColumn = cxy_na_[cxy];
+ const int numCellsInColumn = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
+ const int atomOffset = firstAtomInColumn(cxy);
/* Sort the atoms within each x,y column on z coordinate */
sort_atoms(ZZ, FALSE, dd_zone,
relevantAtomsAreWithinGridBounds,
- nbs->a.data() + ash, numAtomsInColumn, x,
- grid->c0[ZZ],
- 1.0/grid->size[ZZ], numCellsInColumn*grid->na_sc,
+ nbs->a.data() + atomOffset, numAtomsInColumn, x,
+ dimensions_.lowerCorner[ZZ],
+ 1.0/dimensions_.gridSize[ZZ], numCellsInColumn*numAtomsPerCell,
sort_work);
/* This loop goes over the cells and clusters along z at once */
for (int sub_z = 0; sub_z < numCellsInColumn*c_gpuNumClusterPerCellZ; sub_z++)
{
- int ash_z = ash + sub_z*subdiv_z;
- int na_z = std::min(subdiv_z, numAtomsInColumn - (ash_z - ash));
- int cz = -1;
+ const int atomOffsetZ = atomOffset + sub_z*subdiv_z;
+ const int numAtomsZ = std::min(subdiv_z, numAtomsInColumn - (atomOffsetZ - atomOffset));
+ int cz = -1;
/* We have already sorted on z */
if (sub_z % c_gpuNumClusterPerCellZ == 0)
{
cz = sub_z/c_gpuNumClusterPerCellZ;
- int cell = grid->cxy_ind[cxy] + cz;
+ const int cell = cxy_ind_[cxy] + cz;
/* The number of atoms in this cell/super-cluster */
- int numAtoms = std::min(grid->na_sc, numAtomsInColumn - (ash_z - ash));
+ const int numAtoms = std::min(numAtomsPerCell, numAtomsInColumn - (atomOffsetZ - atomOffset));
- grid->nsubc[cell] = std::min(c_gpuNumClusterPerCell,
- (numAtoms + grid->na_c - 1)/grid->na_c);
+ numClusters_[cell] = std::min(c_gpuNumClusterPerCell,
+ (numAtoms + geometry_.numAtomsICluster - 1)/ geometry_.numAtomsICluster);
/* Store the z-boundaries of the bounding box of the cell */
- grid->bbcz[cell*NNBSBB_D ] = x[nbs->a[ash_z]][ZZ];
- grid->bbcz[cell*NNBSBB_D+1] = x[nbs->a[ash_z + numAtoms - 1]][ZZ];
+ bbcz_[cell*NNBSBB_D ] = x[nbs->a[atomOffsetZ]][ZZ];
+ bbcz_[cell*NNBSBB_D+1] = x[nbs->a[atomOffsetZ + numAtoms - 1]][ZZ];
}
if (c_gpuNumClusterPerCellY > 1)
/* Sort the atoms along y */
sort_atoms(YY, (sub_z & 1) != 0, dd_zone,
relevantAtomsAreWithinGridBounds,
- nbs->a.data() + ash_z, na_z, x,
- grid->c0[YY] + gridY*grid->cellSize[YY],
- grid->invCellSize[YY], subdiv_z,
+ nbs->a.data() + atomOffsetZ, numAtomsZ, x,
+ dimensions_.lowerCorner[YY] + gridY*dimensions_.cellSize[YY],
+ dimensions_.invCellSize[YY], subdiv_z,
sort_work);
}
for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++)
{
- int ash_y = ash_z + sub_y*subdiv_y;
- int na_y = std::min(subdiv_y, numAtomsInColumn - (ash_y - ash));
+ const int atomOffsetY = atomOffsetZ + sub_y*subdiv_y;
+ const int numAtomsY = std::min(subdiv_y, numAtomsInColumn - (atomOffsetY - atomOffset));
if (c_gpuNumClusterPerCellX > 1)
{
/* Sort the atoms along x */
sort_atoms(XX, ((cz*c_gpuNumClusterPerCellY + sub_y) & 1) != 0, dd_zone,
relevantAtomsAreWithinGridBounds,
- nbs->a.data() + ash_y, na_y, x,
- grid->c0[XX] + gridX*grid->cellSize[XX],
- grid->invCellSize[XX], subdiv_y,
+ nbs->a.data() + atomOffsetY, numAtomsY, x,
+ dimensions_.lowerCorner[XX] + gridX*dimensions_.cellSize[XX],
+ dimensions_.invCellSize[XX], subdiv_y,
sort_work);
}
for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++)
{
- int ash_x = ash_y + sub_x*subdiv_x;
- int na_x = std::min(subdiv_x, numAtomsInColumn - (ash_x - ash));
+ const int atomOffsetX = atomOffsetY + sub_x*subdiv_x;
+ const int numAtomsX = std::min(subdiv_x, numAtomsInColumn - (atomOffsetX - atomOffset));
- fill_cell(nbs, grid, nbat,
- ash_x, ash_x + na_x, atinfo, x,
- bb_work_aligned);
+ fillCell(nbs, nbat,
+ atomOffsetX, atomOffsetX + numAtomsX, atinfo, x,
+ bb_work_aligned);
}
}
}
/* Set the unused atom indices to -1 */
- for (int ind = numAtomsInColumn; ind < numCellsInColumn*grid->na_sc; ind++)
+ for (int ind = numAtomsInColumn; ind < numCellsInColumn*numAtomsPerCell; ind++)
{
- nbs->a[ash + ind] = -1;
+ nbs->a[atomOffset + ind] = -1;
}
}
}
-/* Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
+/*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
static void setCellAndAtomCount(gmx::ArrayRef<int> cell,
int cellIndex,
gmx::ArrayRef<int> cxy_na,
cxy_na[cellIndex] += 1;
}
-/* Determine in which grid column atoms should go */
-static void calc_column_indices(nbnxn_grid_t *grid,
+/*! \brief Determine in which grid column atoms should go */
+static void calc_column_indices(const Grid::Dimensions &gridDims,
const gmx::UpdateGroupsCog *updateGroupsCog,
int atomStart, int atomEnd,
gmx::ArrayRef<const gmx::RVec> x,
gmx::ArrayRef<int> cell,
gmx::ArrayRef<int> cxy_na)
{
+ const int numColumns = gridDims.numCells[XX]*gridDims.numCells[YY];
+
/* We add one extra cell for particles which moved during DD */
- for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++)
+ for (int i = 0; i < numColumns; i++)
{
cxy_na[i] = 0;
}
int taskAtomStart = atomStart + static_cast<int>((thread + 0)*(atomEnd - atomStart))/nthread;
int taskAtomEnd = atomStart + static_cast<int>((thread + 1)*(atomEnd - atomStart))/nthread;
+
if (dd_zone == 0)
{
/* Home zone */
* The int cast takes care of the lower bound,
* we will explicitly take care of the upper bound.
*/
- int cx = static_cast<int>((coord[XX] - grid->c0[XX])*grid->invCellSize[XX]);
- int cy = static_cast<int>((coord[YY] - grid->c0[YY])*grid->invCellSize[YY]);
+ int cx = static_cast<int>((coord[XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
+ int cy = static_cast<int>((coord[YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
#ifndef NDEBUG
- if (cx < 0 || cx > grid->numCells[XX] ||
- cy < 0 || cy > grid->numCells[YY])
+ if (cx < 0 || cx > gridDims.numCells[XX] ||
+ cy < 0 || cy > gridDims.numCells[YY])
{
gmx_fatal(FARGS,
"grid cell cx %d cy %d out of range (max %d %d)\n"
"atom %f %f %f, grid->c0 %f %f",
- cx, cy, grid->numCells[XX], grid->numCells[YY],
- x[i][XX], x[i][YY], x[i][ZZ], grid->c0[XX], grid->c0[YY]);
+ cx, cy,
+ gridDims.numCells[XX],
+ gridDims.numCells[YY],
+ x[i][XX], x[i][YY], x[i][ZZ],
+ gridDims.lowerCorner[XX],
+ gridDims.lowerCorner[YY]);
}
#endif
/* Take care of potential rouding issues */
- cx = std::min(cx, grid->numCells[XX] - 1);
- cy = std::min(cy, grid->numCells[YY] - 1);
+ cx = std::min(cx, gridDims.numCells[XX] - 1);
+ cy = std::min(cy, gridDims.numCells[YY] - 1);
/* For the moment cell will contain only the, grid local,
* x and y indices, not z.
*/
- setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i);
+ setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
}
else
{
/* Put this moved particle after the end of the grid,
* so we can process it later without using conditionals.
*/
- setCellAndAtomCount(cell, grid->numCells[XX]*grid->numCells[YY], cxy_na, i);
+ setCellAndAtomCount(cell, numColumns, cxy_na, i);
}
}
}
/* Non-home zone */
for (int i = taskAtomStart; i < taskAtomEnd; i++)
{
- int cx = static_cast<int>((x[i][XX] - grid->c0[XX])*grid->invCellSize[XX]);
- int cy = static_cast<int>((x[i][YY] - grid->c0[YY])*grid->invCellSize[YY]);
+ int cx = static_cast<int>((x[i][XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
+ int cy = static_cast<int>((x[i][YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
/* For non-home zones there could be particles outside
* the non-bonded cut-off range, which have been communicated
* we put them in an extra row at the border.
*/
cx = std::max(cx, 0);
- cx = std::min(cx, grid->numCells[XX] - 1);
+ cx = std::min(cx, gridDims.numCells[XX] - 1);
cy = std::max(cy, 0);
- cy = std::min(cy, grid->numCells[YY] - 1);
+ cy = std::min(cy, gridDims.numCells[YY] - 1);
/* For the moment cell will contain only the, grid local,
* x and y indices, not z.
*/
- setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i);
+ setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
}
}
}
-/* Resizes grid and atom data which depend on the number of cells */
-static void resizeForNumberOfCells(const nbnxn_grid_t &grid,
- int numAtomsMoved,
+/*! \brief Resizes grid and atom data which depend on the number of cells */
+static void resizeForNumberOfCells(const int numNbnxnAtoms,
+ const int numAtomsMoved,
nbnxn_search *nbs,
nbnxn_atomdata_t *nbat)
{
- int numNbnxnAtoms = (grid.cell0 + grid.nc)*grid.na_sc;
-
/* Note: nbs->cell was already resized before */
/* To avoid conditionals we store the moved particles at the end of a,
nbat->resizeCoordinateBuffer(numNbnxnAtoms);
}
-/* Determine in which grid cells the atoms should go */
-static void
-calc_cell_indices(nbnxn_search *nbs,
- int ddZone,
- nbnxn_grid_t *grid,
- const gmx::UpdateGroupsCog *updateGroupsCog,
- int atomStart,
- int atomEnd,
- const int *atinfo,
- gmx::ArrayRef<const gmx::RVec> x,
- int numAtomsMoved,
- const int *move,
- nbnxn_atomdata_t *nbat)
+void Grid::calcCellIndices(nbnxn_search *nbs,
+ int ddZone,
+ int cellOffset,
+ const gmx::UpdateGroupsCog *updateGroupsCog,
+ int atomStart,
+ int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ int numAtomsMoved,
+ const int *move,
+ nbnxn_atomdata_t *nbat)
{
+ cellOffset_ = cellOffset;
+
/* First compute all grid/column indices and store them in nbs->cell */
nbs->cell.resize(atomEnd);
{
try
{
- calc_column_indices(grid, updateGroupsCog,
+ calc_column_indices(dimensions_,
+ updateGroupsCog,
atomStart, atomEnd, x,
ddZone, move, thread, nthread,
nbs->cell, nbs->work[thread].cxy_na);
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
+ const int numAtomsPerCell = geometry_.numAtomsPerCell;
+
/* Make the cell index as a function of x and y */
int ncz_max = 0;
int ncz = 0;
- grid->cxy_ind[0] = 0;
- for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++)
+ cxy_ind_[0] = 0;
+ for (int i = 0; i < numColumns() + 1; i++)
{
/* We set ncz_max at the beginning of the loop iso at the end
* to skip i=grid->ncx*grid->numCells[YY] which are moved particles
{
cxy_na_i += nbs->work[thread].cxy_na[i];
}
- ncz = (cxy_na_i + grid->na_sc - 1)/grid->na_sc;
+ ncz = (cxy_na_i + numAtomsPerCell - 1)/numAtomsPerCell;
if (nbat->XFormat == nbatX8)
{
/* Make the number of cell a multiple of 2 */
ncz = (ncz + 1) & ~1;
}
- grid->cxy_ind[i+1] = grid->cxy_ind[i] + ncz;
- /* Clear cxy_na, so we can reuse the array below */
- grid->cxy_na[i] = 0;
+ cxy_ind_[i+1] = cxy_ind_[i] + ncz;
+ /* Clear cxy_na_, so we can reuse the array below */
+ cxy_na_[i] = 0;
}
- grid->nc = grid->cxy_ind[grid->numCells[XX]*grid->numCells[YY]] - grid->cxy_ind[0];
+ numCellsTotal_ = cxy_ind_[numColumns()] - cxy_ind_[0];
- resizeForNumberOfCells(*grid, numAtomsMoved, nbs, nbat);
+ resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved, nbs, nbat);
if (debug)
{
fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
- grid->na_sc, grid->na_c, grid->nc,
- grid->numCells[XX], grid->numCells[YY], grid->nc/(static_cast<double>(grid->numCells[XX]*grid->numCells[YY])),
+ numAtomsPerCell, geometry_.numAtomsICluster, numCellsTotal_,
+ dimensions_.numCells[XX], dimensions_.numCells[YY],
+ numCellsTotal_/(static_cast<double>(numColumns())),
ncz_max);
if (gmx_debug_at)
{
int i = 0;
- for (int cy = 0; cy < grid->numCells[YY]; cy++)
+ for (int cy = 0; cy < dimensions_.numCells[YY]; cy++)
{
- for (int cx = 0; cx < grid->numCells[XX]; cx++)
+ for (int cx = 0; cx < dimensions_.numCells[XX]; cx++)
{
- fprintf(debug, " %2d", grid->cxy_ind[i+1]-grid->cxy_ind[i]);
+ fprintf(debug, " %2d", cxy_ind_[i + 1] - cxy_ind_[i]);
i++;
}
fprintf(debug, "\n");
}
/* Make sure the work array for sorting is large enough */
- if (ncz_max*grid->na_sc*SGSF > gmx::index(nbs->work[0].sortBuffer.size()))
+ const int worstCaseSortBufferSize = ncz_max*numAtomsPerCell*c_sortGridMaxSizeFactor;
+ if (worstCaseSortBufferSize > gmx::index(nbs->work[0].sortBuffer.size()))
{
for (nbnxn_search_work_t &work : nbs->work)
{
/* Elements not in use should be -1 */
- work.sortBuffer.resize(ncz_max*grid->na_sc*SGSF, -1);
+ work.sortBuffer.resize(worstCaseSortBufferSize, -1);
}
}
{
/* At this point nbs->cell contains the local grid x,y indices */
int cxy = nbs->cell[i];
- nbs->a[(grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc + grid->cxy_na[cxy]++] = i;
+ nbs->a[firstAtomInColumn(cxy) + cxy_na_[cxy]++] = i;
}
if (ddZone == 0)
{
/* Set the cell indices for the moved particles */
- int n0 = grid->nc*grid->na_sc;
- int n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->numCells[XX]*grid->numCells[YY]];
+ int n0 = numCellsTotal_*numAtomsPerCell;
+ int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()];
if (ddZone == 0)
{
for (int i = n0; i < n1; i++)
{
try
{
- int columnStart = ((thread + 0)*grid->numCells[XX]*grid->numCells[YY])/nthread;
- int columnEnd = ((thread + 1)*grid->numCells[XX]*grid->numCells[YY])/nthread;
- if (grid->bSimple)
+ int columnStart = ((thread + 0)*numColumns())/nthread;
+ int columnEnd = ((thread + 1)*numColumns())/nthread;
+ if (geometry_.isSimple)
{
- sort_columns_simple(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat,
- columnStart, columnEnd,
- nbs->work[thread].sortBuffer);
+ sortColumnsCpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat,
+ columnStart, columnEnd,
+ nbs->work[thread].sortBuffer);
}
else
{
- sort_columns_supersub(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat,
- columnStart, columnEnd,
- nbs->work[thread].sortBuffer);
+ sortColumnsGpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat,
+ columnStart, columnEnd,
+ nbs->work[thread].sortBuffer);
}
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
- if (grid->bSimple && nbat->XFormat == nbatX8)
+ if (geometry_.isSimple && nbat->XFormat == nbatX8)
{
- combine_bounding_box_pairs(grid, grid->bb);
+ combine_bounding_box_pairs(*this, bb_, bbj_);
}
- if (!grid->bSimple)
+ if (!geometry_.isSimple)
{
- grid->nsubc_tot = 0;
- for (int i = 0; i < grid->nc; i++)
+ numClustersTotal_ = 0;
+ for (int i = 0; i < numCellsTotal_; i++)
{
- grid->nsubc_tot += grid->nsubc[i];
+ numClustersTotal_ += numClusters_[i];
}
}
if (debug)
{
- if (grid->bSimple)
+ if (geometry_.isSimple)
{
- print_bbsizes_simple(debug, grid);
+ print_bbsizes_simple(debug, *this);
}
else
{
fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n",
- grid->nsubc_tot, (atomEnd - atomStart)/static_cast<double>(grid->nsubc_tot));
+ numClustersTotal_,
+ (atomEnd - atomStart)/static_cast<double>(numClustersTotal_));
- print_bbsizes_supersub(debug, grid);
+ print_bbsizes_supersub(debug, *this);
}
}
}
+} // namespace Nbnxm
+
+// TODO: Move the functions below to nbnxn.cpp
+
/* Sets up a grid and puts the atoms on the grid.
* This function only operates on one domain of the domain decompostion.
* Note that without domain decomposition there is only one domain.
const int *move)
{
nbnxn_search *nbs = nbv->nbs.get();
- nbnxn_grid_t *grid = &nbs->grid[ddZone];
+ Nbnxm::Grid &grid = nbs->grid[ddZone];
nbs_cycle_start(&nbs->cc[enbsCCgrid]);
- grid->bSimple = nbv->pairlistIsSimple();
-
- grid->na_c = IClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
- grid->na_cj = JClusterSizePerListType[nbv->pairlistSets().params().pairlistType];
- grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
- grid->na_c_2log = get_2log(grid->na_c);
-
+ int cellOffset;
if (ddZone == 0)
{
- grid->cell0 = 0;
+ cellOffset = 0;
}
else
{
- grid->cell0 =
- (nbs->grid[ddZone - 1].cell0 + nbs->grid[ddZone - 1].nc)*
- nbs->grid[ddZone- 1].na_sc/grid->na_sc;
+ const Nbnxm::Grid &previousGrid = nbs->grid[ddZone - 1];
+ cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell;
}
const int n = atomEnd - atomStart;
+ real maxAtomGroupRadius;
if (ddZone == 0)
{
copy_mat(box, nbs->box);
- /* Avoid zero density */
- if (atomDensity > 0)
- {
- grid->atom_density = atomDensity;
- }
- else
- {
- grid->atom_density = grid_atom_density(n - numAtomsMoved, lowerCorner, upperCorner);
- }
-
- grid->cell0 = 0;
-
nbs->natoms_local = atomEnd - numAtomsMoved;
/* We assume that nbnxn_put_on_grid is called first
* for the local atoms (ddZone=0).
*/
nbs->natoms_nonlocal = atomEnd - numAtomsMoved;
- /* When not using atom groups, all atoms should be within the grid bounds */
- grid->maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0);
- /* For the non-local grids the situation is the same as for the local */
- for (size_t g = 1; g < nbs->grid.size(); g++)
- {
- nbs->grid[g].maxAtomGroupRadius = grid->maxAtomGroupRadius;
- }
+ maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0);
if (debug)
{
fprintf(debug, "natoms_local = %5d atom_density = %5.1f\n",
- nbs->natoms_local, grid->atom_density);
+ nbs->natoms_local, atomDensity);
}
}
else
{
+ const Nbnxm::Grid::Dimensions &dimsGrid0 = nbs->grid[0].dimensions();
+ atomDensity = dimsGrid0.atomDensity;
+ maxAtomGroupRadius = dimsGrid0.maxAtomGroupRadius;
+
nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, atomEnd);
}
/* We always use the home zone (grid[0]) for setting the cell size,
* since determining densities for non-local zones is difficult.
*/
- set_grid_size_xy(nbs, grid,
- ddZone, n - numAtomsMoved,
- lowerCorner, upperCorner,
- nbs->grid[0].atom_density);
+ grid.setDimensions(nbs,
+ ddZone, n - numAtomsMoved,
+ lowerCorner, upperCorner,
+ atomDensity,
+ maxAtomGroupRadius);
nbnxn_atomdata_t *nbat = nbv->nbat.get();
- calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
+ grid.calcCellIndices(nbs, ddZone, cellOffset, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
if (ddZone == 0)
{
void nbnxn_get_ncells(const nbnxn_search *nbs, int *ncx, int *ncy)
{
- *ncx = nbs->grid[0].numCells[XX];
- *ncy = nbs->grid[0].numCells[YY];
+ *ncx = nbs->grid[0].dimensions().numCells[XX];
+ *ncy = nbs->grid[0].dimensions().numCells[YY];
}
gmx::ArrayRef<const int> nbnxn_get_atomorder(const nbnxn_search *nbs)
{
/* Return the atom order for the home cell (index 0) */
- const nbnxn_grid_t &grid = nbs->grid[0];
+ const Nbnxm::Grid &grid = nbs->grid[0];
- int numIndices = grid.cxy_ind[grid.numCells[XX]*grid.numCells[YY]]*grid.na_sc;
+ const int numIndices = grid.atomIndexEnd() - grid.firstAtomInColumn(0);
return gmx::constArrayRefFromArray(nbs->a.data(), numIndices);
}
void nbnxn_set_atomorder(nbnxn_search *nbs)
{
/* Set the atom order for the home cell (index 0) */
- nbnxn_grid_t *grid = &nbs->grid[0];
+ const Nbnxm::Grid &grid = nbs->grid[0];
- int ao = 0;
- for (int cx = 0; cx < grid->numCells[XX]; cx++)
+ int atomIndex = 0;
+ for (int cxy = 0; cxy < grid.numColumns(); cxy++)
{
- for (int cy = 0; cy < grid->numCells[YY]; cy++)
+ const int numAtoms = grid.numAtomsInColumn(cxy);
+ int cellIndex = grid.firstCellInColumn(cxy)*grid.geometry().numAtomsPerCell;
+ for (int i = 0; i < numAtoms; i++)
{
- int cxy = cx*grid->numCells[YY] + cy;
- int j = grid->cxy_ind[cxy]*grid->na_sc;
- for (int cz = 0; cz < grid->cxy_na[cxy]; cz++)
- {
- nbs->a[j] = ao;
- nbs->cell[ao] = j;
- ao++;
- j++;
- }
+ nbs->a[cellIndex] = atomIndex;
+ nbs->cell[atomIndex] = cellIndex;
+ atomIndex++;
+ cellIndex++;
}
}
}
/*! \internal \file
*
- * \brief Declares the grid and bounding box objects
+ * \brief
+ * Declares the Grid class.
*
- * \author Berk Hess <hess@kth.se>
+ * This class provides functionality for setting up and accessing atoms
+ * on a grid for one domain decomposition zone. This grid is used for
+ * generating cluster pair lists for computing non-bonded pair interactions.
+ * The grid consists of a regular array of columns along dimensions x and y.
+ * Along z the number of cells and their boundaries vary between the columns.
+ * Each cell can hold one or more clusters of atoms, depending on the grid
+ * geometry, which is set by the pair-list type.
*
+ * \author Berk Hess <hess@kth.se>
* \ingroup module_nbnxm
*/
#include "gromacs/math/vectypes.h"
#include "gromacs/simd/simd.h"
#include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/arrayref.h"
struct gmx_domdec_zones_t;
+struct nbnxn_atomdata_t;
+struct nbnxn_search;
+enum class PairlistType;
+
+namespace gmx
+{
+class UpdateGroupsCog;
+}
+namespace Nbnxm
+{
#ifndef DOXYGEN
+// TODO: Convert macros to constexpr int and replace BB_? indexing by struct with x,y,z
+
/* Pair search box lower and upper corner in x,y,z.
* Store this in 4 iso 3 reals, which is useful with 4-wide SIMD.
* To avoid complicating the code we also use 4 without 4-wide SIMD.
/* Bounding box for a nbnxn atom cluster */
-typedef struct {
+struct BoundingBox
+{
float lower[NNBSBB_C];
float upper[NNBSBB_C];
-} nbnxn_bb_t;
+};
#ifndef DOXYGEN
+// TODO: Convert macros to constexpr int
+
/* Bounding box calculations are (currently) always in single precision, so
* we only need to check for single precision support here.
* This uses less (cache-)memory and SIMD is faster, at least on x86.
#endif // !DOXYGEN
-/* A pair-search grid struct for one domain decomposition zone
+/*! \internal
+ * \brief A pair-search grid object for one domain decomposition zone
+ *
+ * This is a rectangular 3D grid covering a potentially non-rectangular
+ * volume which is either the whole unit cell or the local zone or part
+ * of a non-local zone when using domain decomposition. The grid cells
+ * are even spaced along x/y and irregular along z. Each cell is sub-divided
+ * into atom clusters. With a CPU geometry, each cell contains 1 or 2 clusters.
+ * With a GPU geometry, each cell contains up to 8 clusters. The geometry is
+ * set by the pairlist type which is the only argument of the constructor.
+ *
+ * When multiple grids are used, i.e. with domain decomposition, we want
+ * to avoid the overhead of multiple coordinate arrays or extra indexing.
+ * Therefore each grid stores a cell offset, so a contiguous cell index
+ * can be used to index atom arrays. All methods returning atom indices
+ * return indices which index into a full atom array.
*
* Note that when atom groups, instead of individual atoms, are assigned
* to grid cells, individual atoms can be geometrically outside the cell
* and grid that they have been assigned to (as determined by the center
* or geometry of the atom group they belong to).
*/
-struct nbnxn_grid_t
+class Grid
{
- rvec c0; /* The lower corner of the (local) grid */
- rvec c1; /* The upper corner of the (local) grid */
- rvec size; /* c1 - c0 */
- real atom_density; /* The atom number density for the local grid */
- real maxAtomGroupRadius; /* The maximum distance an atom can be outside
- * of a cell and outside of the grid
- */
-
- gmx_bool bSimple; /* Is this grid simple or super/sub */
- int na_c; /* Number of atoms per cluster */
- int na_cj; /* Number of atoms for list j-clusters */
- int na_sc; /* Number of atoms per super-cluster */
- int na_c_2log; /* 2log of na_c */
-
- int numCells[DIM - 1]; /* Number of cells along x/y */
- int nc; /* Total number of cells */
-
- real cellSize[DIM - 1]; /* size of a cell */
- real invCellSize[DIM - 1]; /* 1/cellSize */
-
- int cell0; /* Index in nbs->cell corresponding to cell 0 */
-
- /* Grid data */
- std::vector<int> cxy_na; /* The number of atoms for each column in x,y */
- std::vector<int> cxy_ind; /* Grid (super)cell index, offset from cell0 */
-
- std::vector<int> nsubc; /* The number of sub cells for each super cell */
-
- /* Bounding boxes */
- std::vector<float> bbcz; /* Bounding boxes in z for the cells */
- std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bb; /* 3D bounding boxes for the sub cells */
- std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bbjStorage; /* 3D j-bounding boxes for the case where
- * the i- and j-cluster sizes are different */
- gmx::ArrayRef<nbnxn_bb_t> bbj; /* 3D j-bounding boxes */
- std::vector < float, gmx::AlignedAllocator < float>> pbb; /* 3D b. boxes in xxxx format per super cell */
-
- /* Bit-flag information */
- std::vector<int> flags; /* Flags for properties of clusters in each cell */
- std::vector<unsigned int> fep; /* FEP signal bits for atoms in each cluster */
-
- /* Statistics */
- int nsubc_tot; /* Total number of subcell, used for printing */
+ public:
+ /*! \internal
+ * \brief The cluster and cell geometry of a grid
+ */
+ struct Geometry
+ {
+ //! Constructs the cluster/cell geometry given the type of pairlist
+ Geometry(PairlistType pairlistType);
+
+ bool isSimple; //!< Is this grid simple (CPU) or hierarchical (GPU)
+ int numAtomsICluster; //!< Number of atoms per cluster
+ int numAtomsJCluster; //!< Number of atoms for list j-clusters
+ int numAtomsPerCell; //!< Number of atoms per cell
+ int numAtomsICluster2Log; //!< 2log of na_c
+ };
+
+ // The physical dimensions of a grid
+ struct Dimensions
+ {
+ //! The lower corner of the (local) grid
+ rvec lowerCorner;
+ //! The upper corner of the (local) grid
+ rvec upperCorner;
+ //! The physical grid size: upperCorner - lowerCorner
+ rvec gridSize;
+ //! An estimate for the atom number density of the region targeted by the grid
+ real atomDensity;
+ //! The maximum distance an atom can be outside of a cell and outside of the grid
+ real maxAtomGroupRadius;
+ //! Size of cell along dimension x and y
+ real cellSize[DIM - 1];
+ //! 1/size of a cell along dimensions x and y
+ real invCellSize[DIM - 1];
+ //! The number of grid cells along dimensions x and y
+ int numCells[DIM - 1];
+ };
+
+ //! Constructs a grid given the type of pairlist
+ Grid(PairlistType pairlistType);
+
+ //! Returns the geometry of the grid cells
+ const Geometry &geometry() const
+ {
+ return geometry_;
+ }
+
+ //! Returns the dimensions of the grid
+ const Dimensions &dimensions() const
+ {
+ return dimensions_;
+ }
+
+ //! Returns the total number of grid columns
+ int numColumns() const
+ {
+ return dimensions_.numCells[XX]*dimensions_.numCells[YY];
+ }
+
+ //! Returns the total number of grid cells
+ int numCells() const
+ {
+ return numCellsTotal_;
+ }
+
+ //! Returns the cell offset of (the first cell of) this grid in the list of cells combined over all grids
+ int cellOffset() const
+ {
+ return cellOffset_;
+ }
+
+ //! Returns the first cell index in the grid, starting at 0 in this grid
+ int firstCellInColumn(int columnIndex) const
+ {
+ return cxy_ind_[columnIndex];
+ };
+
+ //! Returns the number of cells in the column
+ int numCellsInColumn(int columnIndex) const
+ {
+ return cxy_ind_[columnIndex + 1] - cxy_ind_[columnIndex];
+ };
+
+ //! Returns the index of the first atom in the column
+ int firstAtomInColumn(int columnIndex) const
+ {
+ return (cellOffset_ + cxy_ind_[columnIndex])*geometry_.numAtomsPerCell;
+ };
+
+ //! Returns the number of real atoms in the column
+ int numAtomsInColumn(int columnIndex) const
+ {
+ return cxy_na_[columnIndex];
+ };
+
+ //! Returns the number of atoms in the column including padding
+ int paddedNumAtomsInColumn(int columnIndex) const
+ {
+ return numCellsInColumn(columnIndex)*geometry_.numAtomsPerCell;
+ };
+
+ //! Returns the end of the atom index range on the grid, including padding
+ int atomIndexEnd() const
+ {
+ return (cellOffset_ + numCellsTotal_)*geometry_.numAtomsPerCell;
+ }
+
+ //! Returns whether any atom in the cluster is perturbed
+ bool clusterIsPerturbed(int clusterIndex) const
+ {
+ return fep_[clusterIndex] != 0u;
+ }
+
+ //! Returns whether the given atom in the cluster is perturbed
+ bool atomIsPerturbed(int clusterIndex,
+ int atomIndexInCluster) const
+ {
+ return (fep_[clusterIndex] & (1 << atomIndexInCluster)) != 0u;
+ }
+
+ //! Returns the free-energy perturbation bits for the cluster
+ unsigned int fepBits(int clusterIndex) const
+ {
+ return fep_[clusterIndex];
+ }
+
+ //! Returns the i-bounding boxes for all clusters on the grid
+ gmx::ArrayRef<const BoundingBox> iBoundingBoxes() const
+ {
+ return bb_;
+ }
+
+ //! Returns the j-bounding boxes for all clusters on the grid
+ gmx::ArrayRef<const BoundingBox> jBoundingBoxes() const
+ {
+ return bbj_;
+ }
+
+ //! Returns the packed bounding boxes for all clusters on the grid, empty with a CPU list
+ gmx::ArrayRef<const float> packedBoundingBoxes() const
+ {
+ return pbb_;
+ }
+
+ //! Returns the bounding boxes along z for all cells on the grid
+ gmx::ArrayRef<const float> zBoundingBoxes() const
+ {
+ return bbcz_;
+ }
+
+ //! Returns the flags for all clusters on the grid
+ gmx::ArrayRef<const int> clusterFlags() const
+ {
+ return flags_;
+ }
+
+ //! Returns the number of clusters for all cells on the grid, empty with a CPU geometry
+ gmx::ArrayRef<const int> numClustersPerCell() const
+ {
+ return numClusters_;
+ }
+
+ //! Returns the cluster index for an atom
+ int atomToCluster(int atomIndex) const
+ {
+ return (atomIndex >> geometry_.numAtomsICluster2Log);
+ }
+
+ //! Returns the total number of clusters on the grid
+ int numClusters() const
+ {
+ if (geometry_.isSimple)
+ {
+ return numCellsTotal_;
+ }
+ else
+ {
+ return numClustersTotal_;
+ }
+ }
+
+ //! Sets the grid dimensions
+ void setDimensions(const nbnxn_search *nbs,
+ int ddZone,
+ int numAtoms,
+ const rvec lowerCorner,
+ const rvec upperCorner,
+ real atomDensity,
+ real maxAtomGroupRadius);
+
+ //! Determine in which grid cells the atoms should go
+ void calcCellIndices(nbnxn_search *nbs,
+ int ddZone,
+ int cellOffset,
+ const gmx::UpdateGroupsCog *updateGroupsCog,
+ int atomStart,
+ int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ int numAtomsMoved,
+ const int *move,
+ nbnxn_atomdata_t *nbat);
+
+ private:
+ /*! \brief Fill a pair search cell with atoms
+ *
+ * Potentially sorts atoms and sets the interaction flags.
+ */
+ void fillCell(nbnxn_search *nbs,
+ nbnxn_atomdata_t *nbat,
+ int atomStart,
+ int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ BoundingBox gmx_unused *bb_work_aligned);
+
+ //! Spatially sort the atoms within one grid column
+ void sortColumnsCpuGeometry(nbnxn_search *nbs,
+ int dd_zone,
+ int atomStart, int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ nbnxn_atomdata_t *nbat,
+ int cxy_start, int cxy_end,
+ gmx::ArrayRef<int> sort_work);
+
+ //! Spatially sort the atoms within one grid column
+ void sortColumnsGpuGeometry(nbnxn_search *nbs,
+ int dd_zone,
+ int atomStart, int atomEnd,
+ const int *atinfo,
+ gmx::ArrayRef<const gmx::RVec> x,
+ nbnxn_atomdata_t *nbat,
+ int cxy_start, int cxy_end,
+ gmx::ArrayRef<int> sort_work);
+
+ /* Data members */
+ //! The geometry of the grid clusters and cells
+ Geometry geometry_;
+ //! The physical dimensions of the grid
+ Dimensions dimensions_;
+
+ //! The total number of cells in this grid
+ int numCellsTotal_;
+ //! Index in nbs->cell corresponding to cell 0 */
+ int cellOffset_;
+
+ /* Grid data */
+ //! The number of, non-filler, atoms for each grid column
+ std::vector<int> cxy_na_;
+ //! The grid-local cell index for each grid column
+ std::vector<int> cxy_ind_;
+
+ //! The number of cluster for each cell
+ std::vector<int> numClusters_;
+
+ /* Bounding boxes */
+ //! Bounding boxes in z for the cells
+ std::vector<float> bbcz_;
+ //! 3D bounding boxes for the sub cells
+ std::vector < BoundingBox, gmx::AlignedAllocator < BoundingBox>> bb_;
+ //! 3D j-bounding boxes for the case where the i- and j-cluster sizes are different
+ std::vector < BoundingBox, gmx::AlignedAllocator < BoundingBox>> bbjStorage_;
+ //! 3D j-bounding boxes
+ gmx::ArrayRef<BoundingBox> bbj_;
+ //! 3D bounding boxes in packed xxxx format per cell
+ std::vector < float, gmx::AlignedAllocator < float>> pbb_;
+
+ /* Bit-flag information */
+ //! Flags for properties of clusters in each cell
+ std::vector<int> flags_;
+ //! Signal bits for atoms in each cell that tell whether an atom is perturbed
+ std::vector<unsigned int> fep_;
+
+ /* Statistics */
+ //! Total number of clusters, used for printing
+ int numClustersTotal_;
};
+} // namespace Nbnxm
+
#endif
#include "gromacs/utility/real.h"
struct gmx_domdec_zones_t;
-struct nbnxn_grid_t;
+
+namespace Nbnxm
+{
+class Grid;
+}
// TODO Document after refactoring
enbsCCgrid, enbsCCsearch, enbsCCcombine, enbsCCreducef, enbsCCnr
};
-/* Thread-local work struct, contains part of nbnxn_grid_t */
+/* Thread-local work struct, contains working data for Grid */
struct nbnxn_search_work_t
{
nbnxn_search_work_t();
{
/* \brief Constructor
*
- * \param[in] ePBC The periodic boundary conditions
- * \param[in] n_dd_cells The number of domain decomposition cells per dimension, without DD nullptr should be passed
- * \param[in] zones The domain decomposition zone setup, without DD nullptr should be passed
- * \param[in] bFEP Tells whether non-bonded interactions are perturbed
- * \param[in] nthread_max The maximum number of threads used in the search
+ * \param[in] ePBC The periodic boundary conditions
+ * \param[in] n_dd_cells The number of domain decomposition cells per dimension, without DD nullptr should be passed
+ * \param[in] zones The domain decomposition zone setup, without DD nullptr should be passed
+ * \param[in] nb_kernel_type The nbnxn non-bonded kernel type
+ * \param[in] bFEP Tells whether non-bonded interactions are perturbed
+ * \param[in] nthread_max The maximum number of threads used in the search
*/
nbnxn_search(int ePBC,
const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
+ PairlistType pairlistType,
gmx_bool bFEP,
int nthread_max);
ivec dd_dim; /* Are we doing DD in x,y,z? */
const gmx_domdec_zones_t *zones; /* The domain decomposition zones */
- std::vector<nbnxn_grid_t> grid; /* Array of grids, size ngrid */
+ std::vector<Nbnxm::Grid> grid; /* Array of grids, size ngrid */
std::vector<int> cell; /* Actual allocated cell array for all grids */
std::vector<int> a; /* Atom index for grid, the inverse of cell */
std::make_unique<nbnxn_search>(ir->ePBC,
DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
+ listParams.pairlistType,
bFEP_NonBonded,
gmx_omp_nthreads_get(emntPairsearch));
#include "internal.h"
#include "pairlistwork.h"
-using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
+
+using nbnxn_bb_t = Nbnxm::BoundingBox; // TODO: Remove when refactoring this file
+
+using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file
// Convience alias for partial Nbnxn namespace usage
using InteractionLocality = Nbnxm::InteractionLocality;
nbnxn_search::nbnxn_search(int ePBC,
const ivec *n_dd_cells,
const gmx_domdec_zones_t *zones,
+ const PairlistType pairlistType,
gmx_bool bFEP,
int nthread_max) :
bFEP(bFEP),
}
}
- grid.resize(numGrids);
+ grid.resize(numGrids, pairlistType);
/* Initialize detailed nbsearch cycle counting */
print_cycles = (getenv("GMX_NBNXN_CYCLE") != nullptr);
* assigned to (when atom groups instead of individual atoms are assigned
* to cells), this distance returned can be larger than the input.
*/
-static real listRangeForBoundingBoxToGridCell(real rlist,
- const nbnxn_grid_t &grid)
+static real
+listRangeForBoundingBoxToGridCell(real rlist,
+ const Grid::Dimensions &gridDims)
{
- return rlist + grid.maxAtomGroupRadius;
+ return rlist + gridDims.maxAtomGroupRadius;
}
/* Returns the pair-list cutoff between a grid cells given an atom-to-atom pair-list cutoff
* assigned to (when atom groups instead of individual atoms are assigned
* to cells), this distance returned can be larger than the input.
*/
-static real listRangeForGridCellToGridCell(real rlist,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid)
+static real
+listRangeForGridCellToGridCell(real rlist,
+ const Grid::Dimensions &iGridDims,
+ const Grid::Dimensions &jGridDims)
{
- return rlist + iGrid.maxAtomGroupRadius + jGrid.maxAtomGroupRadius;
+ return rlist + iGridDims.maxAtomGroupRadius + jGridDims.maxAtomGroupRadius;
}
/* Determines the cell range along one dimension that
*/
template<int dim>
static void get_cell_range(real b0, real b1,
- const nbnxn_grid_t &jGrid,
+ const Grid::Dimensions &jGridDims,
real d2, real rlist, int *cf, int *cl)
{
- real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid));
- real distanceInCells = (b0 - jGrid.c0[dim])*jGrid.invCellSize[dim];
+ real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGridDims));
+ real distanceInCells = (b0 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim];
*cf = std::max(static_cast<int>(distanceInCells), 0);
while (*cf > 0 &&
- d2 + gmx::square((b0 - jGrid.c0[dim]) - (*cf - 1 + 1)*jGrid.cellSize[dim]) < listRangeBBToCell2)
+ d2 + gmx::square((b0 - jGridDims.lowerCorner[dim]) - (*cf - 1 + 1)*jGridDims.cellSize[dim]) < listRangeBBToCell2)
{
(*cf)--;
}
- *cl = std::min(static_cast<int>((b1 - jGrid.c0[dim])*jGrid.invCellSize[dim]), jGrid.numCells[dim] - 1);
- while (*cl < jGrid.numCells[dim] - 1 &&
- d2 + gmx::square((*cl + 1)*jGrid.cellSize[dim] - (b1 - jGrid.c0[dim])) < listRangeBBToCell2)
+ *cl = std::min(static_cast<int>((b1 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim]), jGridDims.numCells[dim] - 1);
+ while (*cl < jGridDims.numCells[dim] - 1 &&
+ d2 + gmx::square((*cl + 1)*jGridDims.cellSize[dim] - (b1 - jGridDims.lowerCorner[dim])) < listRangeBBToCell2)
{
(*cl)++;
}
static void print_nblist_statistics(FILE *fp, const NbnxnPairlistCpu *nbl,
const nbnxn_search *nbs, real rl)
{
- const nbnxn_grid_t *grid;
- int cs[SHIFTS];
- int npexcl;
-
- grid = &nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
+ const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nci %zu ncj %d\n",
nbl->ci.size(), nbl->ncjInUse);
+ const int numAtomsJCluster = grid.geometry().numAtomsJCluster;
+ const double numAtomsPerCell = nbl->ncjInUse/static_cast<double>(grid.numCells())*numAtomsJCluster;
fprintf(fp, "nbl na_cj %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
- nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast<double>(grid->nc),
- nbl->ncjInUse/static_cast<double>(grid->nc)*grid->na_cj,
- nbl->ncjInUse/static_cast<double>(grid->nc)*grid->na_cj/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_cj/(grid->size[XX]*grid->size[YY]*grid->size[ZZ])));
+ nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast<double>(grid.numCells()),
+ numAtomsPerCell,
+ numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numCells()*numAtomsJCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ])));
fprintf(fp, "nbl average j cell list length %.1f\n",
0.25*nbl->ncjInUse/std::max(static_cast<double>(nbl->ci.size()), 1.0));
- for (int s = 0; s < SHIFTS; s++)
- {
- cs[s] = 0;
- }
- npexcl = 0;
+ int cs[SHIFTS] = { 0 };
+ int npexcl = 0;
for (const nbnxn_ci_t &ciEntry : nbl->ci)
{
cs[ciEntry.shift & NBNXN_CI_SHIFT] +=
static void print_nblist_statistics(FILE *fp, const NbnxnPairlistGpu *nbl,
const nbnxn_search *nbs, real rl)
{
- const nbnxn_grid_t *grid;
- int b;
- int c[c_gpuNumClusterPerCell + 1];
- double sum_nsp, sum_nsp2;
- int nsp_max;
-
- /* This code only produces correct statistics with domain decomposition */
- grid = &nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
+ const Grid::Dimensions &dims = grid.dimensions();
fprintf(fp, "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n",
nbl->sci.size(), nbl->cj4.size(), nbl->nci_tot, nbl->excl.size());
+ const int numAtomsCluster = grid.geometry().numAtomsICluster;
+ const double numAtomsPerCell = nbl->nci_tot/static_cast<double>(grid.numClusters())*numAtomsCluster;
fprintf(fp, "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n",
- nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast<double>(grid->nsubc_tot),
- nbl->nci_tot/static_cast<double>(grid->nsubc_tot)*grid->na_c,
- nbl->nci_tot/static_cast<double>(grid->nsubc_tot)*grid->na_c/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nsubc_tot*grid->na_c/(grid->size[XX]*grid->size[YY]*grid->size[ZZ])));
-
- sum_nsp = 0;
- sum_nsp2 = 0;
- nsp_max = 0;
- for (int si = 0; si <= c_gpuNumClusterPerCell; si++)
- {
- c[si] = 0;
- }
+ nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast<double>(grid.numClusters()),
+ numAtomsPerCell,
+ numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numClusters()*numAtomsCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ])));
+
+ double sum_nsp = 0;
+ double sum_nsp2 = 0;
+ int nsp_max = 0;
+ int c[c_gpuNumClusterPerCell + 1] = { 0 };
for (const nbnxn_sci_t &sci : nbl->sci)
{
int nsp = 0;
{
for (int j = 0; j < c_nbnxnGpuJgroupSize; j++)
{
- b = 0;
+ int b = 0;
for (int si = 0; si < c_gpuNumClusterPerCell; si++)
{
if (nbl->cj4[j4].imei[0].imask & (1U << (j*c_gpuNumClusterPerCell + si)))
if (!nbl->cj4.empty())
{
- for (b = 0; b <= c_gpuNumClusterPerCell; b++)
+ for (int b = 0; b <= c_gpuNumClusterPerCell; b++)
{
fprintf(fp, "nbl j-list #i-subcell %d %7d %4.1f\n",
b, c[b], 100.0*c[b]/size_t {nbl->cj4.size()*c_nbnxnGpuJgroupSize});
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
static void
-makeClusterListSimple(const nbnxn_grid_t &jGrid,
+makeClusterListSimple(const Grid &jGrid,
NbnxnPairlistCpu * nbl,
int icluster,
int jclusterFirst,
InRange = FALSE;
while (!InRange && jclusterFirst <= jclusterLast)
{
- real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bb);
+ real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- int cjf_gl = jGrid.cell0 + jclusterFirst;
+ int cjf_gl = jGrid.cellOffset() + jclusterFirst;
for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++)
{
for (int j = 0; j < c_nbnxnCpuIClusterSize; j++)
InRange = FALSE;
while (!InRange && jclusterLast > jclusterFirst)
{
- real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bb);
+ real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
/* Check if the distance is within the distance where
}
else if (d2 < rlist2)
{
- int cjl_gl = jGrid.cell0 + jclusterLast;
+ int cjl_gl = jGrid.cellOffset() + jclusterLast;
for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++)
{
for (int j = 0; j < c_nbnxnCpuIClusterSize; j++)
{
/* Store cj and the interaction mask */
nbnxn_cj_t cjEntry;
- cjEntry.cj = jGrid.cell0 + jcluster;
+ cjEntry.cj = jGrid.cellOffset() + jcluster;
cjEntry.excl = get_imask(excludeSubDiagonal, icluster, jcluster);
nbl->cj.push_back(cjEntry);
}
/* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj.
* Checks bounding box distances and possibly atom pair distances.
*/
-static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+static void make_cluster_list_supersub(const Grid &iGrid,
+ const Grid &jGrid,
NbnxnPairlistGpu *nbl,
const int sci,
const int scj,
const nbnxn_bb_t *bb_ci = work.iSuperClusterData.bb.data();
#endif
- assert(c_nbnxnGpuClusterSize == iGrid.na_c);
- assert(c_nbnxnGpuClusterSize == jGrid.na_c);
+ assert(c_nbnxnGpuClusterSize == iGrid.geometry().numAtomsICluster);
+ assert(c_nbnxnGpuClusterSize == jGrid.geometry().numAtomsICluster);
/* We generate the pairlist mainly based on bounding-box distances
* and do atom pair distance based pruning on the GPU.
float *d2l = work.distanceBuffer.data();
- for (int subc = 0; subc < jGrid.nsubc[scj]; subc++)
+ for (int subc = 0; subc < jGrid.numClustersPerCell()[scj]; subc++)
{
const int cj4_ind = work.cj_ind/c_nbnxnGpuJgroupSize;
const int cj_offset = work.cj_ind - cj4_ind*c_nbnxnGpuJgroupSize;
const int cj = scj*c_gpuNumClusterPerCell + subc;
- const int cj_gl = jGrid.cell0*c_gpuNumClusterPerCell + cj;
+ const int cj_gl = jGrid.cellOffset()*c_gpuNumClusterPerCell + cj;
int ci1;
if (excludeSubDiagonal && sci == scj)
}
else
{
- ci1 = iGrid.nsubc[sci];
+ ci1 = iGrid.numClustersPerCell()[sci];
}
#if NBNXN_BBXXXX
/* Determine all ci1 bb distances in one call with SIMD4 */
- subc_bb_dist2_simd4_xxxx(jGrid.pbb.data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)),
+ subc_bb_dist2_simd4_xxxx(jGrid.packedBoundingBoxes().data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)),
ci1, pbb_ci, d2l);
*numDistanceChecks += c_nbnxnGpuClusterSize*2;
#endif
#if !NBNXN_BBXXXX
/* Determine the bb distance between ci and cj */
- d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.bb);
+ d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.jBoundingBoxes());
*numDistanceChecks += 2;
#endif
float d2 = d2l[ci];
real gmx_unused shy,
real gmx_unused shz,
real gmx_unused rlist_fep2,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
t_nblist *nlist)
{
int ci, cj_ind_start, cj_ind_end, cja, cjr;
int nri_max;
- int ngid, gid_i = 0, gid_j, gid;
+ int gid_i = 0, gid_j, gid;
int egp_shift, egp_mask;
int gid_cj = 0;
int ind_i, ind_j, ai, aj;
reallocate_nblist(nlist);
}
+ const int numAtomsJCluster = jGrid.geometry().numAtomsJCluster;
+
const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
- ngid = nbatParams.nenergrp;
+ const int ngid = nbatParams.nenergrp;
- if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8))
+ /* TODO: Consider adding a check in grompp and changing this to an assert */
+ const int numBitsInEnergyGroupIdsForAtomsInJCluster = sizeof(gid_cj)*8;
+ if (ngid*numAtomsJCluster > numBitsInEnergyGroupIdsForAtomsInJCluster)
{
gmx_fatal(FARGS, "The Verlet scheme with %dx%d kernels and free-energy only supports up to %zu energy groups",
- iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj);
+ iGrid.geometry().numAtomsICluster, numAtomsJCluster,
+ (sizeof(gid_cj)*8)/numAtomsJCluster);
}
egp_shift = nbatParams.neg_2log;
nlist->gid[nri] = 0;
nlist->shift[nri] = nbl_ci->shift & NBNXN_CI_SHIFT;
- bFEP_i = ((iGrid.fep[ci - iGrid.cell0] & (1 << i)) != 0u);
+ bFEP_i = iGrid.atomIsPerturbed(ci - iGrid.cellOffset(), i);
bFEP_i_all = bFEP_i_all && bFEP_i;
cja = nbl->cj[cj_ind].cj;
- if (jGrid.na_cj == jGrid.na_c)
+ if (numAtomsJCluster == jGrid.geometry().numAtomsICluster)
{
- cjr = cja - jGrid.cell0;
- fep_cj = jGrid.fep[cjr];
+ cjr = cja - jGrid.cellOffset();
+ fep_cj = jGrid.fepBits(cjr);
if (ngid > 1)
{
gid_cj = nbatParams.energrp[cja];
}
}
- else if (2*jGrid.na_cj == jGrid.na_c)
+ else if (2*numAtomsJCluster == jGrid.geometry().numAtomsICluster)
{
- cjr = cja - jGrid.cell0*2;
+ cjr = cja - jGrid.cellOffset()*2;
/* Extract half of the ci fep/energrp mask */
- fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<<jGrid.na_cj) - 1);
+ fep_cj = (jGrid.fepBits(cjr >> 1) >> ((cjr & 1)*numAtomsJCluster)) & ((1 << numAtomsJCluster) - 1);
if (ngid > 1)
{
- gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1);
+ gid_cj = nbatParams.energrp[cja >> 1] >> ((cja & 1)*numAtomsJCluster*egp_shift) & ((1 << (numAtomsJCluster*egp_shift)) - 1);
}
}
else
{
- cjr = cja - (jGrid.cell0>>1);
+ cjr = cja - (jGrid.cellOffset() >> 1);
/* Combine two ci fep masks/energrp */
- fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c);
+ fep_cj = jGrid.fepBits(cjr*2) + (jGrid.fepBits(cjr*2 + 1) << jGrid.geometry().numAtomsICluster);
if (ngid > 1)
{
- gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift));
+ gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.geometry().numAtomsICluster*egp_shift));
}
}
real shy,
real shz,
real rlist_fep2,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
t_nblist *nlist)
{
int nri_max;
nlist->gid[nri] = 0;
nlist->shift[nri] = nbl_sci->shift & NBNXN_CI_SHIFT;
- bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u);
+ bFEP_i = iGrid.atomIsPerturbed(c_abs - iGrid.cellOffset()*c_gpuNumClusterPerCell, i);
xi = nbat->x()[ind_i*nbat->xstride+XX] + shx;
yi = nbat->x()[ind_i*nbat->xstride+YY] + shy;
for (int gcj = 0; gcj < c_nbnxnGpuJgroupSize; gcj++)
{
- unsigned int fep_cj;
-
if ((cj4->imei[0].imask & (1U << (gcj*c_gpuNumClusterPerCell + c))) == 0)
{
/* Skip this ci for this cj */
}
const int cjr =
- cj4->cj[gcj] - jGrid.cell0*c_gpuNumClusterPerCell;
-
- fep_cj = jGrid.fep[cjr];
+ cj4->cj[gcj] - jGrid.cellOffset()*c_gpuNumClusterPerCell;
- if (bFEP_i || fep_cj != 0)
+ if (bFEP_i || jGrid.clusterIsPerturbed(cjr))
{
for (int j = 0; j < nbl->na_cj; j++)
{
/* Is this interaction perturbed and not excluded? */
- ind_j = (jGrid.cell0*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j;
+ ind_j = (jGrid.cellOffset()*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j;
aj = nbs->a[ind_j];
if (aj >= 0 &&
- (bFEP_i || (fep_cj & (1 << j))) &&
+ (bFEP_i || jGrid.atomIsPerturbed(cjr, j)) &&
(!bDiagRemoved || ind_j >= ind_i))
{
int excl_pair;
}
/* Sets a simple list i-cell bounding box, including PBC shift */
-static inline void set_icell_bb(const nbnxn_grid_t &iGrid,
+static inline void set_icell_bb(const Grid &iGrid,
int ci,
real shx, real shy, real shz,
NbnxnPairlistCpuWork *work)
{
- set_icell_bb_simple(iGrid.bb, ci, shx, shy, shz, &work->iClusterData.bb[0]);
+ set_icell_bb_simple(iGrid.iBoundingBoxes(), ci, shx, shy, shz,
+ &work->iClusterData.bb[0]);
}
#if NBNXN_BBXXXX
}
/* Sets a super-cell and sub cell bounding boxes, including PBC shift */
-gmx_unused static void set_icell_bb(const nbnxn_grid_t &iGrid,
+gmx_unused static void set_icell_bb(const Grid &iGrid,
int ci,
real shx, real shy, real shz,
NbnxnPairlistGpuWork *work)
{
#if NBNXN_BBXXXX
- set_icell_bbxxxx_supersub(iGrid.pbb, ci, shx, shy, shz,
+ set_icell_bbxxxx_supersub(iGrid.packedBoundingBoxes(), ci, shx, shy, shz,
work->iSuperClusterData.bbPacked.data());
#else
- set_icell_bb_supersub(iGrid.bb, ci, shx, shy, shz,
+ set_icell_bb_supersub(iGrid.iBoundingBoxes(), ci, shx, shy, shz,
work->iSuperClusterData.bb.data());
#endif
}
#endif /* !GMX_SIMD4_HAVE_REAL */
}
-static real minimum_subgrid_size_xy(const nbnxn_grid_t &grid)
+static real minimum_subgrid_size_xy(const Grid &grid)
{
- if (grid.bSimple)
+ const Grid::Dimensions &dims = grid.dimensions();
+
+ if (grid.geometry().isSimple)
{
- return std::min(grid.cellSize[XX], grid.cellSize[YY]);
+ return std::min(dims.cellSize[XX], dims.cellSize[YY]);
}
else
{
- return std::min(grid.cellSize[XX]/c_gpuNumClusterPerCellX,
- grid.cellSize[YY]/c_gpuNumClusterPerCellY);
+ return std::min(dims.cellSize[XX]/c_gpuNumClusterPerCellX,
+ dims.cellSize[YY]/c_gpuNumClusterPerCellY);
}
}
-static real effective_buffer_1x1_vs_MxN(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid)
+static real effective_buffer_1x1_vs_MxN(const Grid &iGrid,
+ const Grid &jGrid)
{
const real eff_1x1_buffer_fac_overest = 0.1;
const int nsubpair_target_min = 36;
real r_eff_sup, vol_est, nsp_est, nsp_est_nl;
- const nbnxn_grid_t &grid = nbs->grid[0];
+ const Grid &grid = nbs->grid[0];
/* We don't need to balance list sizes if:
* - We didn't request balancing.
* since we will always generate at least #cells lists.
* - We don't have any cells, since then there won't be any lists.
*/
- if (min_ci_balanced <= 0 || grid.nc >= min_ci_balanced || grid.nc == 0)
+ if (min_ci_balanced <= 0 || grid.numCells() >= min_ci_balanced || grid.numCells() == 0)
{
/* nsubpair_target==0 signals no balancing */
*nsubpair_target = 0;
return;
}
- gmx::RVec ls;
- ls[XX] = (grid.c1[XX] - grid.c0[XX])/(grid.numCells[XX]*c_gpuNumClusterPerCellX);
- ls[YY] = (grid.c1[YY] - grid.c0[YY])/(grid.numCells[YY]*c_gpuNumClusterPerCellY);
- ls[ZZ] = grid.na_c/(grid.atom_density*ls[XX]*ls[YY]);
+ gmx::RVec ls;
+ const int numAtomsCluster = grid.geometry().numAtomsICluster;
+ const Grid::Dimensions &dims = grid.dimensions();
+
+ ls[XX] = dims.cellSize[XX]/c_gpuNumClusterPerCellX;
+ ls[YY] = dims.cellSize[YY]/c_gpuNumClusterPerCellY;
+ ls[ZZ] = numAtomsCluster/(dims.atomDensity*ls[XX]*ls[YY]);
/* The formulas below are a heuristic estimate of the average nsj per si*/
- r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(grid.na_c, ls);
+ r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls);
if (!nbs->DomDec || nbs->zones->n == 1)
{
else
{
nsp_est_nl =
- gmx::square(grid.atom_density/grid.na_c)*
+ gmx::square(dims.atomDensity/numAtomsCluster)*
nonlocal_vol2(nbs->zones, ls, r_eff_sup);
}
/* Estimate the number of cluster pairs as the local number of
* clusters times the volume they interact with times the density.
*/
- nsp_est = grid.nsubc_tot*vol_est*grid.atom_density/grid.na_c;
+ nsp_est = grid.numClusters()*vol_est*dims.atomDensity/numAtomsCluster;
/* Subtract the non-local pair count */
nsp_est -= nsp_est_nl;
* groups of atoms we'll anyhow be limited by nsubpair_target_min,
* so this overestimation will not matter.
*/
- nsp_est = std::max(nsp_est, grid.nsubc_tot*14._real);
+ nsp_est = std::max(nsp_est, grid.numClusters()*14._real);
if (debug)
{
}
/* Returns the next ci to be processes by our thread */
-static gmx_bool next_ci(const nbnxn_grid_t &grid,
+static gmx_bool next_ci(const Grid &grid,
int nth, int ci_block,
int *ci_x, int *ci_y,
int *ci_b, int *ci)
*ci_b = 0;
}
- if (*ci >= grid.nc)
+ if (*ci >= grid.numCells())
{
return FALSE;
}
- while (*ci >= grid.cxy_ind[*ci_x*grid.numCells[YY] + *ci_y + 1])
+ while (*ci >= grid.firstCellInColumn(*ci_x*grid.dimensions().numCells[YY] + *ci_y + 1))
{
*ci_y += 1;
- if (*ci_y == grid.numCells[YY])
+ if (*ci_y == grid.dimensions().numCells[YY])
{
*ci_x += 1;
*ci_y = 0;
/* Returns the distance^2 for which we put cell pairs in the list
* without checking atom pair distances. This is usually < rlist^2.
*/
-static float boundingbox_only_distance2(const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
- real rlist,
- gmx_bool simple)
+static float boundingbox_only_distance2(const Grid::Dimensions &iGridDims,
+ const Grid::Dimensions &jGridDims,
+ real rlist,
+ gmx_bool simple)
{
/* If the distance between two sub-cell bounding boxes is less
* than this distance, do not check the distance between
real bbx, bby;
real rbb2;
- bbx = 0.5*(iGrid.cellSize[XX] + jGrid.cellSize[XX]);
- bby = 0.5*(iGrid.cellSize[YY] + jGrid.cellSize[YY]);
+ bbx = 0.5*(iGridDims.cellSize[XX] + jGridDims.cellSize[XX]);
+ bby = 0.5*(iGridDims.cellSize[YY] + jGridDims.cellSize[YY]);
if (!simple)
{
bbx /= c_gpuNumClusterPerCellX;
#endif
}
-static int get_ci_block_size(const nbnxn_grid_t &iGrid,
+static int get_ci_block_size(const Grid &iGrid,
gmx_bool bDomDec, int nth)
{
const int ci_block_enum = 5;
* zone boundaries with 3D domain decomposition. At the same time
* the blocks will not become too small.
*/
- ci_block = (iGrid.nc*ci_block_enum)/(ci_block_denom*iGrid.numCells[XX]*nth);
+ ci_block = (iGrid.numCells()*ci_block_enum)/(ci_block_denom*iGrid.dimensions().numCells[XX]*nth);
+
+ const int numAtomsPerCell = iGrid.geometry().numAtomsPerCell;
/* Ensure the blocks are not too small: avoids cache invalidation */
- if (ci_block*iGrid.na_sc < ci_block_min_atoms)
+ if (ci_block*numAtomsPerCell < ci_block_min_atoms)
{
- ci_block = (ci_block_min_atoms + iGrid.na_sc - 1)/iGrid.na_sc;
+ ci_block = (ci_block_min_atoms + numAtomsPerCell - 1)/numAtomsPerCell;
}
/* Without domain decomposition
* or with less than 3 blocks per task, divide in nth blocks.
*/
- if (!bDomDec || nth*3*ci_block > iGrid.nc)
+ if (!bDomDec || nth*3*ci_block > iGrid.numCells())
{
- ci_block = (iGrid.nc + nth - 1)/nth;
+ ci_block = (iGrid.numCells() + nth - 1)/nth;
}
- if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.nc)
+ if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.numCells())
{
/* Some threads have no work. Although reducing the block size
* does not decrease the block count on the first few threads,
return false;
}
-static void makeClusterListWrapper(NbnxnPairlistCpu *nbl,
- const nbnxn_grid_t gmx_unused &iGrid,
- const int ci,
- const nbnxn_grid_t &jGrid,
- const int firstCell,
- const int lastCell,
- const bool excludeSubDiagonal,
- const nbnxn_atomdata_t *nbat,
- const real rlist2,
- const real rbb2,
- const Nbnxm::KernelType kernelType,
- int *numDistanceChecks)
+static void makeClusterListWrapper(NbnxnPairlistCpu *nbl,
+ const Grid gmx_unused &iGrid,
+ const int ci,
+ const Grid &jGrid,
+ const int firstCell,
+ const int lastCell,
+ const bool excludeSubDiagonal,
+ const nbnxn_atomdata_t *nbat,
+ const real rlist2,
+ const real rbb2,
+ const Nbnxm::KernelType kernelType,
+ int *numDistanceChecks)
{
switch (kernelType)
{
}
}
-static void makeClusterListWrapper(NbnxnPairlistGpu *nbl,
- const nbnxn_grid_t &gmx_unused iGrid,
- const int ci,
- const nbnxn_grid_t &jGrid,
- const int firstCell,
- const int lastCell,
- const bool excludeSubDiagonal,
- const nbnxn_atomdata_t *nbat,
- const real rlist2,
- const real rbb2,
- Nbnxm::KernelType gmx_unused kernelType,
- int *numDistanceChecks)
+static void makeClusterListWrapper(NbnxnPairlistGpu *nbl,
+ const Grid &gmx_unused iGrid,
+ const int ci,
+ const Grid &jGrid,
+ const int firstCell,
+ const int lastCell,
+ const bool excludeSubDiagonal,
+ const nbnxn_atomdata_t *nbat,
+ const real rlist2,
+ const real rbb2,
+ Nbnxm::KernelType gmx_unused kernelType,
+ int *numDistanceChecks)
{
for (int cj = firstCell; cj <= lastCell; cj++)
{
/* Generates the part of pair-list nbl assigned to our thread */
template <typename T>
static void nbnxn_make_pairlist_part(const nbnxn_search *nbs,
- const nbnxn_grid_t &iGrid,
- const nbnxn_grid_t &jGrid,
+ const Grid &iGrid,
+ const Grid &jGrid,
nbnxn_search_work_t *work,
const nbnxn_atomdata_t *nbat,
const t_blocka &exclusions,
nbs_cycle_start(&work->cc[enbsCCsearch]);
- if (jGrid.bSimple != pairlistIsSimple(*nbl) ||
- iGrid.bSimple != pairlistIsSimple(*nbl))
+ if (jGrid.geometry().isSimple != pairlistIsSimple(*nbl) ||
+ iGrid.geometry().isSimple != pairlistIsSimple(*nbl))
{
gmx_incons("Grid incompatible with pair-list");
}
sync_work(nbl);
- GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match");
+ GMX_ASSERT(nbl->na_ci == jGrid.geometry().numAtomsICluster,
+ "The cluster sizes in the list and grid should match");
nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType];
na_cj_2log = get_2log(nbl->na_cj);
rl_fep2 = rl_fep2*rl_fep2;
}
- rbb2 = boundingbox_only_distance2(iGrid, jGrid, nbl->rlist, pairlistIsSimple(*nbl));
+ const Grid::Dimensions &iGridDims = iGrid.dimensions();
+ const Grid::Dimensions &jGridDims = jGrid.dimensions();
+
+ rbb2 = boundingbox_only_distance2(iGridDims, jGridDims, nbl->rlist, pairlistIsSimple(*nbl));
if (debug)
{
}
else
{
- const real listRangeCellToCell = listRangeForGridCellToGridCell(rlist, iGrid, jGrid);
+ const real listRangeCellToCell =
+ listRangeForGridCellToGridCell(rlist, iGrid.dimensions(), jGrid.dimensions());
if (d == XX &&
box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < listRangeCellToCell)
{
gmx::ArrayRef<const float> pbb_i;
if (bSimple)
{
- bb_i = iGrid.bb;
+ bb_i = iGrid.iBoundingBoxes();
}
else
{
- pbb_i = iGrid.pbb;
+ pbb_i = iGrid.packedBoundingBoxes();
}
#else
/* We use the normal bounding box format for both grid types */
- bb_i = iGrid.bb;
+ bb_i = iGrid.iBoundingBoxes();
#endif
- gmx::ArrayRef<const float> bbcz_i = iGrid.bbcz;
- gmx::ArrayRef<const int> flags_i = iGrid.flags;
- gmx::ArrayRef<const float> bbcz_j = jGrid.bbcz;
- int cell0_i = iGrid.cell0;
+ gmx::ArrayRef<const float> bbcz_i = iGrid.zBoundingBoxes();
+ gmx::ArrayRef<const int> flags_i = iGrid.clusterFlags();
+ gmx::ArrayRef<const float> bbcz_j = jGrid.zBoundingBoxes();
+ int cell0_i = iGrid.cellOffset();
if (debug)
{
fprintf(debug, "nbl nc_i %d col.av. %.1f ci_block %d\n",
- iGrid.nc, iGrid.nc/static_cast<double>(iGrid.numCells[XX]*iGrid.numCells[YY]), ci_block);
+ iGrid.numCells(), iGrid.numCells()/static_cast<double>(iGrid.numColumns()), ci_block);
}
numDistanceChecks = 0;
- const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid));
+ const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid.dimensions()));
/* Initially ci_b and ci to 1 before where we want them to start,
* as they will both be incremented in next_ci.
}
else
{
- bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX];
+ bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX];
}
- if (bx1 < jGrid.c0[XX])
+ if (bx1 < jGridDims.lowerCorner[XX])
{
- d2cx = gmx::square(jGrid.c0[XX] - bx1);
+ d2cx = gmx::square(jGridDims.lowerCorner[XX] - bx1);
if (d2cx >= listRangeBBToJCell2)
{
}
}
- ci_xy = ci_x*iGrid.numCells[YY] + ci_y;
+ ci_xy = ci_x*iGridDims.numCells[YY] + ci_y;
/* Loop over shift vectors in three dimensions */
for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
continue;
}
- bz1_frac = bz1/(iGrid.cxy_ind[ci_xy+1] - iGrid.cxy_ind[ci_xy]);
+ bz1_frac = bz1/iGrid.numCellsInColumn(ci_xy);
if (bz1_frac < 0)
{
bz1_frac = 0;
}
else
{
- by0 = iGrid.c0[YY] + (ci_y )*iGrid.cellSize[YY] + shy;
- by1 = iGrid.c0[YY] + (ci_y+1)*iGrid.cellSize[YY] + shy;
+ by0 = iGridDims.lowerCorner[YY] + (ci_y )*iGridDims.cellSize[YY] + shy;
+ by1 = iGridDims.lowerCorner[YY] + (ci_y + 1)*iGridDims.cellSize[YY] + shy;
}
get_cell_range<YY>(by0, by1,
- jGrid,
+ jGridDims,
d2z_cx, rlist,
&cyf, &cyl);
}
d2z_cy = d2z;
- if (by1 < jGrid.c0[YY])
+ if (by1 < jGridDims.lowerCorner[YY])
{
- d2z_cy += gmx::square(jGrid.c0[YY] - by1);
+ d2z_cy += gmx::square(jGridDims.lowerCorner[YY] - by1);
}
- else if (by0 > jGrid.c1[YY])
+ else if (by0 > jGridDims.upperCorner[YY])
{
- d2z_cy += gmx::square(by0 - jGrid.c1[YY]);
+ d2z_cy += gmx::square(by0 - jGridDims.upperCorner[YY]);
}
for (int tx = -shp[XX]; tx <= shp[XX]; tx++)
}
else
{
- bx0 = iGrid.c0[XX] + (ci_x )*iGrid.cellSize[XX] + shx;
- bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX] + shx;
+ bx0 = iGridDims.lowerCorner[XX] + (ci_x )*iGridDims.cellSize[XX] + shx;
+ bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX] + shx;
}
get_cell_range<XX>(bx0, bx1,
- jGrid,
+ jGridDims,
d2z_cy, rlist,
&cxf, &cxl);
for (int cx = cxf; cx <= cxl; cx++)
{
d2zx = d2z;
- if (jGrid.c0[XX] + cx*jGrid.cellSize[XX] > bx1)
+ if (jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] > bx1)
{
- d2zx += gmx::square(jGrid.c0[XX] + cx*jGrid.cellSize[XX] - bx1);
+ d2zx += gmx::square(jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] - bx1);
}
- else if (jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] < bx0)
+ else if (jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] < bx0)
{
- d2zx += gmx::square(jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] - bx0);
+ d2zx += gmx::square(jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] - bx0);
}
if (isIntraGridList &&
for (int cy = cyf_x; cy <= cyl; cy++)
{
- const int columnStart = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy];
- const int columnEnd = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy + 1];
+ const int columnStart = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy);
+ const int columnEnd = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy + 1);
d2zxy = d2zx;
- if (jGrid.c0[YY] + cy*jGrid.cellSize[YY] > by1)
+ if (jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] > by1)
{
- d2zxy += gmx::square(jGrid.c0[YY] + cy*jGrid.cellSize[YY] - by1);
+ d2zxy += gmx::square(jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] - by1);
}
- else if (jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] < by0)
+ else if (jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] < by0)
{
- d2zxy += gmx::square(jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] - by0);
+ d2zxy += gmx::square(jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] - by0);
}
if (columnStart < columnEnd && d2zxy < listRangeBBToJCell2)
{
if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i)
{
- bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cell0+ci) >> gridi_flag_shift]), th);
+ bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th);
}
}
for (int zi = 0; zi < nzi; zi++)
{
- const nbnxn_grid_t &iGrid = nbs->grid[zi];
+ const Grid &iGrid = nbs->grid[zi];
int zj0;
int zj1;
}
for (int zj = zj0; zj < zj1; zj++)
{
- const nbnxn_grid_t &jGrid = nbs->grid[zj];
+ const Grid &jGrid = nbs->grid[zj];
if (debug)
{
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
static inline void
-makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid,
+makeClusterListSimd2xnn(const Grid &jGrid,
NbnxnPairlistCpu * nbl,
int icluster,
int firstCell,
while (!InRange && jclusterFirst <= jclusterLast)
{
#if NBNXN_SEARCH_BB_SIMD4
- d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.bbj);
+ d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
#else
- d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bbj);
+ d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
#endif
*numDistanceChecks += 2;
}
else if (d2 < rlist2)
{
- xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cell0) + jclusterFirst);
+ xind_f = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterFirst);
jx_S = loadDuplicateHsimd(x_j + xind_f + 0*c_xStride2xNN);
jy_S = loadDuplicateHsimd(x_j + xind_f + 1*c_xStride2xNN);
while (!InRange && jclusterLast > jclusterFirst)
{
#if NBNXN_SEARCH_BB_SIMD4
- d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.bbj);
+ d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
#else
- d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bbj);
+ d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
#endif
*numDistanceChecks += 2;
}
else if (d2 < rlist2)
{
- xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cell0) + jclusterLast);
+ xind_l = xIndexFromCj<NbnxnLayout::Simd2xNN>(cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jclusterLast);
jx_S = loadDuplicateHsimd(x_j + xind_l + 0*c_xStride2xNN);
jy_S = loadDuplicateHsimd(x_j + xind_l + 1*c_xStride2xNN);
{
/* Store cj and the interaction mask */
nbnxn_cj_t cjEntry;
- cjEntry.cj = cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cell0) + jcluster;
+ cjEntry.cj = cjFromCi<NbnxnLayout::Simd2xNN, 0>(jGrid.cellOffset()) + jcluster;
cjEntry.excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster);
nbl->cj.push_back(cjEntry);
}
* \param[in,out] numDistanceChecks The number of distance checks performed
*/
static inline void
-makeClusterListSimd4xn(const nbnxn_grid_t &jGrid,
+makeClusterListSimd4xn(const Grid &jGrid,
NbnxnPairlistCpu * nbl,
int icluster,
int firstCell,
while (!InRange && jclusterFirst <= jclusterLast)
{
#if NBNXN_SEARCH_BB_SIMD4
- d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.bbj);
+ d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
#else
- d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bbj);
+ d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes());
#endif
*numDistanceChecks += 2;
}
else if (d2 < rlist2)
{
- xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cell0) + jclusterFirst);
+ xind_f = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterFirst);
jx_S = load<SimdReal>(x_j + xind_f + 0*c_xStride4xN);
jy_S = load<SimdReal>(x_j + xind_f + 1*c_xStride4xN);
while (!InRange && jclusterLast > jclusterFirst)
{
#if NBNXN_SEARCH_BB_SIMD4
- d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.bbj);
+ d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
#else
- d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bbj);
+ d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes());
#endif
*numDistanceChecks += 2;
}
else if (d2 < rlist2)
{
- xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cell0) + jclusterLast);
+ xind_l = xIndexFromCj<NbnxnLayout::Simd4xN>(cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jclusterLast);
jx_S = load<SimdReal>(x_j +xind_l + 0*c_xStride4xN);
jy_S = load<SimdReal>(x_j +xind_l + 1*c_xStride4xN);
{
/* Store cj and the interaction mask */
nbnxn_cj_t cjEntry;
- cjEntry.cj = cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cell0) + jcluster;
+ cjEntry.cj = cjFromCi<NbnxnLayout::Simd4xN, 0>(jGrid.cellOffset()) + jcluster;
cjEntry.excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster);
nbl->cj.push_back(cjEntry);
}
*
* TODO: Merge into the constructor
*/
+typedef void nbnxn_free_t (void *ptr);
+
+/* Tells if the pair-list corresponding to nb_kernel_type is simple.
+ * Returns FALSE for super-sub type pair-list.
+ */
+gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type);
+
+/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t */
void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list);
/*! \brief Prepare the list-set produced by the search for dynamic pruning
}
// The bounding boxes, pbc shifted, for each cluster
- AlignedVector<nbnxn_bb_t> bb;
+ AlignedVector<Nbnxm::BoundingBox> bb;
// The coordinates, pbc shifted, for each atom
- std::vector<real> x;
+ std::vector<real> x;
// Aligned list for storing 4*DIM*GMX_SIMD_REAL_WIDTH reals
- AlignedVector<real> xSimd;
+ AlignedVector<real> xSimd;
};
// Protect data from cache pollution between threads
}
// The bounding boxes, pbc shifted, for each cluster
- AlignedVector<nbnxn_bb_t> bb;
+ AlignedVector<Nbnxm::BoundingBox> bb;
// As bb, but in packed xxxx format
- AlignedVector<float> bbPacked;
+ AlignedVector<float> bbPacked;
// The coordinates, pbc shifted, for each atom
- AlignedVector<real> x;
+ AlignedVector<real> x;
// Aligned coordinate list used for 4*DIM*GMX_SIMD_REAL_WIDTH floats
- AlignedVector<real> xSimd;
+ AlignedVector<real> xSimd;
};
NbnxnPairlistGpuWork() :