From 978fb13ebe8450dc96f85c1033cfc682658acc0c Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Tue, 8 Jan 2019 23:03:18 +0100 Subject: [PATCH] Turn nbnxn_grid_t into a class Change-Id: I7dbb2268ad7ee70a49db5fd20b1938e786b1da35 --- src/gromacs/nbnxm/atomdata.cpp | 114 ++-- src/gromacs/nbnxm/grid.cpp | 812 +++++++++++++------------ src/gromacs/nbnxm/grid.h | 371 +++++++++-- src/gromacs/nbnxm/internal.h | 22 +- src/gromacs/nbnxm/nbnxm_setup.cpp | 1 + src/gromacs/nbnxm/pairlist.cpp | 410 +++++++------ src/gromacs/nbnxm/pairlist_simd_2xmm.h | 16 +- src/gromacs/nbnxm/pairlist_simd_4xm.h | 16 +- src/gromacs/nbnxm/pairlistset.h | 8 + src/gromacs/nbnxm/pairlistwork.h | 14 +- 10 files changed, 1072 insertions(+), 712 deletions(-) diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index db26637b05..dcfd25cb24 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -71,7 +71,6 @@ using namespace gmx; // TODO: Remove when this file is moved into gmx namespace - void nbnxn_atomdata_t::resizeCoordinateBuffer(int numAtoms) { numAtoms_ = numAtoms; @@ -722,9 +721,7 @@ static void copy_lj_to_nbat_lj_comb(gmx::ArrayRef ljparam_type, static int numAtomsFromGrids(const nbnxn_search &nbs) { - const nbnxn_grid_t &lastGrid = nbs.grid.back(); - - return (lastGrid.cell0 + lastGrid.nc)*lastGrid.na_sc; + return nbs.grid.back().atomIndexEnd(); } /* Sets the atom type in nbnxn_atomdata_t */ @@ -734,16 +731,16 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t::Params *params, { params->type.resize(numAtomsFromGrids(*nbs)); - for (const nbnxn_grid_t &grid : nbs->grid) + for (const Nbnxm::Grid &grid : nbs->grid) { /* Loop over all columns and copy and fill */ - for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++) + for (int i = 0; i < grid.numColumns(); i++) { - int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i]; - int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc; + const int numAtoms = grid.paddedNumAtomsInColumn(i); + const int atomOffset = grid.firstAtomInColumn(i); - copy_int_to_nbat_int(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc, - type, params->numTypes - 1, params->type.data() + ash); + copy_int_to_nbat_int(nbs->a.data() + atomOffset, grid.numAtomsInColumn(i), numAtoms, + type, params->numTypes - 1, params->type.data() + atomOffset); } } } @@ -757,34 +754,34 @@ static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t::Params *params, if (params->comb_rule != ljcrNONE) { - for (const nbnxn_grid_t &grid : nbs->grid) + for (const Nbnxm::Grid &grid : nbs->grid) { /* Loop over all columns and copy and fill */ - for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++) + for (int i = 0; i < grid.numColumns(); i++) { - int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i]; - int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc; + const int numAtoms = grid.paddedNumAtomsInColumn(i); + const int atomOffset = grid.firstAtomInColumn(i); if (XFormat == nbatX4) { copy_lj_to_nbat_lj_comb(params->nbfp_comb, - params->type.data() + ash, - ncz*grid.na_sc, - params->lj_comb.data() + ash*2); + params->type.data() + atomOffset, + numAtoms, + params->lj_comb.data() + atomOffset*2); } else if (XFormat == nbatX8) { copy_lj_to_nbat_lj_comb(params->nbfp_comb, - params->type.data() + ash, - ncz*grid.na_sc, - params->lj_comb.data() + ash*2); + params->type.data() + atomOffset, + numAtoms, + params->lj_comb.data() + atomOffset*2); } else if (XFormat == nbatXYZQ) { copy_lj_to_nbat_lj_comb<1>(params->nbfp_comb, - params->type.data() + ash, - ncz*grid.na_sc, - params->lj_comb.data() + ash*2); + params->type.data() + atomOffset, + numAtoms, + params->lj_comb.data() + atomOffset*2); } } } @@ -801,26 +798,26 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t *nbat, nbat->paramsDeprecated().q.resize(nbat->numAtoms()); } - for (const nbnxn_grid_t &grid : nbs->grid) + for (const Nbnxm::Grid &grid : nbs->grid) { /* Loop over all columns and copy and fill */ - for (int cxy = 0; cxy < grid.numCells[XX]*grid.numCells[YY]; cxy++) + for (int cxy = 0; cxy < grid.numColumns(); cxy++) { - int ash = (grid.cell0 + grid.cxy_ind[cxy])*grid.na_sc; - int na = grid.cxy_na[cxy]; - int na_round = (grid.cxy_ind[cxy+1] - grid.cxy_ind[cxy])*grid.na_sc; + const int atomOffset = grid.firstAtomInColumn(cxy); + const int numAtoms = grid.numAtomsInColumn(cxy); + const int paddedNumAtoms = grid.paddedNumAtomsInColumn(cxy); if (nbat->XFormat == nbatXYZQ) { - real *q = nbat->x().data() + ash*STRIDE_XYZQ + ZZ + 1; + real *q = nbat->x().data() + atomOffset*STRIDE_XYZQ + ZZ + 1; int i; - for (i = 0; i < na; i++) + for (i = 0; i < numAtoms; i++) { - *q = charge[nbs->a[ash+i]]; + *q = charge[nbs->a[atomOffset + i]]; q += STRIDE_XYZQ; } /* Complete the partially filled last cell with zeros */ - for (; i < na_round; i++) + for (; i < paddedNumAtoms; i++) { *q = 0; q += STRIDE_XYZQ; @@ -828,15 +825,15 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t *nbat, } else { - real *q = nbat->paramsDeprecated().q.data() + ash; + real *q = nbat->paramsDeprecated().q.data() + atomOffset; int i; - for (i = 0; i < na; i++) + for (i = 0; i < numAtoms; i++) { - *q = charge[nbs->a[ash+i]]; + *q = charge[nbs->a[atomOffset + i]]; q++; } /* Complete the partially filled last cell with zeros */ - for (; i < na_round; i++) + for (; i < paddedNumAtoms; i++) { *q = 0; q++; @@ -870,10 +867,10 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat, stride_q = 1; } - for (const nbnxn_grid_t &grid : nbs->grid) + for (const Nbnxm::Grid &grid : nbs->grid) { int nsubc; - if (grid.bSimple) + if (grid.geometry().isSimple) { nsubc = 1; } @@ -882,20 +879,21 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat, nsubc = c_gpuNumClusterPerCell; } - int c_offset = grid.cell0*grid.na_sc; + int c_offset = grid.firstAtomInColumn(0); /* Loop over all columns and copy and fill */ - for (int c = 0; c < grid.nc*nsubc; c++) + for (int c = 0; c < grid.numCells()*nsubc; c++) { /* Does this cluster contain perturbed particles? */ - if (grid.fep[c] != 0) + if (grid.clusterIsPerturbed(c)) { - for (int i = 0; i < grid.na_c; i++) + const int numAtomsPerCluster = grid.geometry().numAtomsICluster; + for (int i = 0; i < numAtomsPerCluster; i++) { /* Is this a perturbed particle? */ - if (grid.fep[c] & (1 << i)) + if (grid.atomIsPerturbed(c, i)) { - int ind = c_offset + c*grid.na_c + i; + int ind = c_offset + c*numAtomsPerCluster + i; /* Set atom type and charge to non-interacting */ params.type[ind] = params.numTypes - 1; q[ind*stride_q] = 0; @@ -948,18 +946,18 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t::Params *params, params->energrp.resize(numAtomsFromGrids(*nbs)); - for (const nbnxn_grid_t &grid : nbs->grid) + for (const Nbnxm::Grid &grid : nbs->grid) { /* Loop over all columns and copy and fill */ - for (int i = 0; i < grid.numCells[XX]*grid.numCells[YY]; i++) + for (int i = 0; i < grid.numColumns(); i++) { - int ncz = grid.cxy_ind[i+1] - grid.cxy_ind[i]; - int ash = (grid.cell0 + grid.cxy_ind[i])*grid.na_sc; + const int numAtoms = grid.paddedNumAtomsInColumn(i); + const int atomOffset = grid.firstAtomInColumn(i); - copy_egp_to_nbat_egps(nbs->a.data() + ash, grid.cxy_na[i], ncz*grid.na_sc, + copy_egp_to_nbat_egps(nbs->a.data() + atomOffset, grid.numAtomsInColumn(i), numAtoms, c_nbnxnCpuIClusterSize, params->neg_2log, atinfo, - params->energrp.data() + (ash >> grid.na_c_2log)); + params->energrp.data() + grid.atomToCluster(atomOffset)); } } } @@ -1034,7 +1032,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search *nbs, if (FillLocal) { - nbat->natoms_local = nbs->grid[0].nc*nbs->grid[0].na_sc; + nbat->natoms_local = nbs->grid[0].atomIndexEnd(); } nth = gmx_omp_nthreads_get(emntPairsearch); @@ -1046,23 +1044,21 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search *nbs, { for (int g = g0; g < g1; g++) { - const nbnxn_grid_t &grid = nbs->grid[g]; - const int numCellsXY = grid.numCells[XX]*grid.numCells[YY]; + const Nbnxm::Grid &grid = nbs->grid[g]; + const int numCellsXY = grid.numColumns(); const int cxy0 = (numCellsXY* th + nth - 1)/nth; const int cxy1 = (numCellsXY*(th + 1) + nth - 1)/nth; for (int cxy = cxy0; cxy < cxy1; cxy++) { - int na, ash, na_fill; - - na = grid.cxy_na[cxy]; - ash = (grid.cell0 + grid.cxy_ind[cxy])*grid.na_sc; + const int na = grid.numAtomsInColumn(cxy); + const int ash = grid.firstAtomInColumn(cxy); + int na_fill; if (g == 0 && FillLocal) { - na_fill = - (grid.cxy_ind[cxy+1] - grid.cxy_ind[cxy])*grid.na_sc; + na_fill = grid.paddedNumAtomsInColumn(cxy); } else { diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp index 3e91161f2c..e971e0f34b 100644 --- a/src/gromacs/nbnxm/grid.cpp +++ b/src/gromacs/nbnxm/grid.cpp @@ -33,6 +33,15 @@ * the research papers on the package. Check out http://www.gromacs.org. */ +/*! \internal \file + * + * \brief + * Implements the Grid class. + * + * \author Berk Hess + * \ingroup module_nbnxm + */ + #include "gmxpre.h" #include "grid.h" @@ -51,6 +60,8 @@ #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/nbnxm.h" #include "gromacs/nbnxm/nbnxm_geometry.h" +#include "gromacs/nbnxm/pairlist.h" +#include "gromacs/nbnxm/pairlistset.h" #include "gromacs/simd/simd.h" #include "gromacs/simd/vector_operations.h" #include "gromacs/utility/exceptions.h" @@ -60,9 +71,27 @@ struct gmx_domdec_zones_t; -static real grid_atom_density(int numAtoms, - const rvec lowerCorner, - const rvec upperCorner) +namespace Nbnxm +{ + +Grid::Geometry::Geometry(const PairlistType pairlistType) : + isSimple(pairlistType != PairlistType::Hierarchical8x8), + numAtomsICluster(IClusterSizePerListType[pairlistType]), + numAtomsJCluster(JClusterSizePerListType[pairlistType]), + numAtomsPerCell((isSimple ? 1 : c_gpuNumClusterPerCell)*numAtomsICluster), + numAtomsICluster2Log(get_2log(numAtomsICluster)) +{ +} + +Grid::Grid(const PairlistType pairlistType) : + geometry_(pairlistType) +{ +} + +/*! \brief Returns the atom density (> 0) of a rectangular grid */ +static real gridAtomDensity(int numAtoms, + const rvec lowerCorner, + const rvec upperCorner) { rvec size; @@ -77,60 +106,70 @@ static real grid_atom_density(int numAtoms, return numAtoms/(size[XX]*size[YY]*size[ZZ]); } -static void set_grid_size_xy(const nbnxn_search *nbs, - nbnxn_grid_t *grid, - int ddZone, - int numAtoms, - const rvec lowerCorner, - const rvec upperCorner, - real atomDensity) +void Grid::setDimensions(const nbnxn_search *nbs, + const int ddZone, + const int numAtoms, + const rvec lowerCorner, + const rvec upperCorner, + real atomDensity, + const real maxAtomGroupRadius) { - rvec size; - real tlen, tlen_x, tlen_y; + /* For the home zone we compute the density when not set (=-1) or when =0 */ + if (ddZone == 0 && atomDensity <= 0) + { + atomDensity = gridAtomDensity(numAtoms, lowerCorner, upperCorner); + } + dimensions_.atomDensity = atomDensity; + dimensions_.maxAtomGroupRadius = maxAtomGroupRadius; + + rvec size; rvec_sub(upperCorner, lowerCorner, size); - if (numAtoms > grid->na_sc) + if (numAtoms > geometry_.numAtomsPerCell) { GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive"); /* target cell length */ - if (grid->bSimple) + real tlen_x; + real tlen_y; + if (geometry_.isSimple) { /* To minimize the zero interactions, we should make * the largest of the i/j cell cubic. */ - int numAtomsInCell = std::max(grid->na_c, grid->na_cj); + int numAtomsInCell = std::max(geometry_.numAtomsICluster, + geometry_.numAtomsJCluster); /* Approximately cubic cells */ - tlen = std::cbrt(numAtomsInCell/atomDensity); - tlen_x = tlen; - tlen_y = tlen; + real tlen = std::cbrt(numAtomsInCell/atomDensity); + tlen_x = tlen; + tlen_y = tlen; } else { /* Approximately cubic sub cells */ - tlen = std::cbrt(grid->na_c/atomDensity); - tlen_x = tlen*c_gpuNumClusterPerCellX; - tlen_y = tlen*c_gpuNumClusterPerCellY; + real tlen = std::cbrt(geometry_.numAtomsICluster/atomDensity); + tlen_x = tlen*c_gpuNumClusterPerCellX; + tlen_y = tlen*c_gpuNumClusterPerCellY; } /* We round ncx and ncy down, because we get less cell pairs * in the nbsist when the fixed cell dimensions (x,y) are * larger than the variable one (z) than the other way around. */ - grid->numCells[XX] = std::max(1, static_cast(size[XX]/tlen_x)); - grid->numCells[YY] = std::max(1, static_cast(size[YY]/tlen_y)); + dimensions_.numCells[XX] = std::max(1, static_cast(size[XX]/tlen_x)); + dimensions_.numCells[YY] = std::max(1, static_cast(size[YY]/tlen_y)); } else { - grid->numCells[XX] = 1; - grid->numCells[YY] = 1; + dimensions_.numCells[XX] = 1; + dimensions_.numCells[YY] = 1; } for (int d = 0; d < DIM - 1; d++) { - grid->cellSize[d] = size[d]/grid->numCells[d]; - grid->invCellSize[d] = 1/grid->cellSize[d]; + dimensions_.cellSize[d] = size[d]/dimensions_.numCells[d]; + dimensions_.invCellSize[d] = 1/dimensions_.cellSize[d]; } if (ddZone > 0) @@ -141,76 +180,77 @@ static void set_grid_size_xy(const nbnxn_search *nbs, * they end up on the grid, but for performance it's better * if they don't end up in cells that can be within cut-off range. */ - grid->numCells[XX]++; - grid->numCells[YY]++; + dimensions_.numCells[XX]++; + dimensions_.numCells[YY]++; } /* We need one additional cell entry for particles moved by DD */ - int numCellsXY = grid->numCells[XX]*grid->numCells[YY]; - grid->cxy_na.resize(numCellsXY + 1); - grid->cxy_ind.resize(numCellsXY + 2); + cxy_na_.resize(numColumns() + 1); + cxy_ind_.resize(numColumns() + 2); for (nbnxn_search_work_t &work : nbs->work) { - work.cxy_na.resize(numCellsXY + 1); + work.cxy_na.resize(numColumns() + 1); } /* Worst case scenario of 1 atom in each last cell */ int maxNumCells; - if (grid->na_cj <= grid->na_c) + if (geometry_.numAtomsJCluster <= geometry_.numAtomsICluster) { - maxNumCells = numAtoms/grid->na_sc + numCellsXY; + maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns(); } else { - maxNumCells = numAtoms/grid->na_sc + numCellsXY*grid->na_cj/grid->na_c; + maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns()*geometry_.numAtomsJCluster/geometry_.numAtomsICluster; } - grid->nsubc.resize(maxNumCells); - grid->bbcz.resize(maxNumCells*NNBSBB_D); + if (!geometry_.isSimple) + { + numClusters_.resize(maxNumCells); + } + bbcz_.resize(maxNumCells*NNBSBB_D); /* This resize also zeros the contents, this avoid possible * floating exceptions in SIMD with the unused bb elements. */ - if (grid->bSimple) + if (geometry_.isSimple) { - grid->bb.resize(maxNumCells); + bb_.resize(maxNumCells); } else { #if NBNXN_BBXXXX - grid->pbb.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX); + pbb_.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX); #else - grid->bb.resize(maxNumCells*c_gpuNumClusterPerCell); + bb_.resize(maxNumCells*c_gpuNumClusterPerCell); #endif } - if (grid->bSimple) + if (geometry_.numAtomsJCluster == geometry_.numAtomsICluster) { - if (grid->na_cj == grid->na_c) - { - grid->bbj = grid->bb; - } - else - { - grid->bbjStorage.resize(maxNumCells*grid->na_c/grid->na_cj); - grid->bbj = grid->bbjStorage; - } + bbj_ = bb_; + } + else + { + GMX_ASSERT(geometry_.isSimple, "Only CPU lists should have different i/j cluster sizes"); + + bbjStorage_.resize(maxNumCells*geometry_.numAtomsICluster/geometry_.numAtomsJCluster); + bbj_ = bbjStorage_; } - grid->flags.resize(maxNumCells); + flags_.resize(maxNumCells); if (nbs->bFEP) { - grid->fep.resize(maxNumCells*grid->na_sc/grid->na_c); + fep_.resize(maxNumCells*geometry_.numAtomsPerCell/geometry_.numAtomsICluster); } - copy_rvec(lowerCorner, grid->c0); - copy_rvec(upperCorner, grid->c1); - copy_rvec(size, grid->size); + copy_rvec(lowerCorner, dimensions_.lowerCorner); + copy_rvec(upperCorner, dimensions_.upperCorner); + copy_rvec(size, dimensions_.gridSize); } /* We need to sort paricles in grid columns on z-coordinate. - * As particle are very often distributed homogeneously, we a sorting + * As particle are very often distributed homogeneously, we use a sorting * algorithm similar to pigeonhole sort. We multiply the z-coordinate * by a factor, cast to an int and try to store in that hole. If the hole * is full, we move this or another particle. A second pass is needed to make @@ -219,20 +259,23 @@ static void set_grid_size_xy(const nbnxn_search *nbs, * for an O(#particles) sort up till distributions were all particles are * concentrated in 1/4 of the space. No NlogN fallback is implemented, * as it can be expensive to detect imhomogeneous particle distributions. - * SGSF is the maximum ratio of holes used, in the worst case all particles - * end up in the last hole and we need #particles extra holes at the end. */ -#define SORT_GRID_OVERSIZE 4 -#define SGSF (SORT_GRID_OVERSIZE + 1) +/*! \brief Ratio of grid cells to atoms */ +static constexpr int c_sortGridRatio = 4; +/*! \brief Maximum ratio of holes used, in the worst case all particles + * end up in the last hole and we need num. atoms extra holes at the end. + */ +static constexpr int c_sortGridMaxSizeFactor = c_sortGridRatio + 1; -/* Sort particle index a on coordinates x along dim. +/*! \brief Sorts particle index a on coordinates x along dim. + * * Backwards tells if we want decreasing iso increasing coordinates. * h0 is the minimum of the coordinate range. * invh is the 1/length of the sorting range. * n_per_h (>=n) is the expected average number of particles per 1/invh * sort is the sorting work array. - * sort should have a size of at least n_per_h*SORT_GRID_OVERSIZE + n, - * or easier, allocate at least n*SGSF elements. + * sort should have a size of at least n_per_h*c_sortGridRatio + n, + * or easier, allocate at least n*c_sortGridMaxSizeFactor elements. */ static void sort_atoms(int dim, gmx_bool Backwards, int gmx_unused dd_zone, @@ -251,12 +294,12 @@ static void sort_atoms(int dim, gmx_bool Backwards, GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h"); /* Transform the inverse range height into the inverse hole height */ - invh *= n_per_h*SORT_GRID_OVERSIZE; + invh *= n_per_h*c_sortGridRatio; /* Set nsort to the maximum possible number of holes used. * In worst case all n elements end up in the last bin. */ - int nsort = n_per_h*SORT_GRID_OVERSIZE + n; + int nsort = n_per_h*c_sortGridRatio + n; /* Determine the index range used, so we can limit it for the second pass */ int zi_min = INT_MAX; @@ -274,11 +317,11 @@ static void sort_atoms(int dim, gmx_bool Backwards, #ifndef NDEBUG /* As we can have rounding effect, we use > iso >= here */ if (relevantAtomsAreWithinGridBounds && - (zi < 0 || (dd_zone == 0 && zi > n_per_h*SORT_GRID_OVERSIZE))) + (zi < 0 || (dd_zone == 0 && zi > n_per_h*c_sortGridRatio))) { gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n", a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi, - n_per_h, SORT_GRID_OVERSIZE); + n_per_h, c_sortGridRatio); } #endif if (zi < 0) @@ -290,9 +333,9 @@ static void sort_atoms(int dim, gmx_bool Backwards, * can be far beyond the grid size, which is set by the non-bonded * cut-off distance. We sort such particles into the last cell. */ - if (zi > n_per_h*SORT_GRID_OVERSIZE) + if (zi > n_per_h*c_sortGridRatio) { - zi = n_per_h*SORT_GRID_OVERSIZE; + zi = n_per_h*c_sortGridRatio; } /* Ideally this particle should go in sort cell zi, @@ -370,15 +413,32 @@ static void sort_atoms(int dim, gmx_bool Backwards, } #if GMX_DOUBLE -#define R2F_D(x) ((float)((x) >= 0 ? ((1-GMX_FLOAT_EPS)*(x)) : ((1+GMX_FLOAT_EPS)*(x)))) -#define R2F_U(x) ((float)((x) >= 0 ? ((1+GMX_FLOAT_EPS)*(x)) : ((1-GMX_FLOAT_EPS)*(x)))) +//! Returns double up to one least significant float bit smaller than x +static double R2F_D(const float x) +{ + return static_cast(x >= 0 ? (1 - GMX_FLOAT_EPS)*x : (1 + GMX_FLOAT_EPS)*x); +} +//! Returns double up to one least significant float bit larger than x +static double R2F_U(const float x) +{ + return static_cast(x >= 0 ? (1 + GMX_FLOAT_EPS)*x : (1 - GMX_FLOAT_EPS)*x); +} #else -#define R2F_D(x) (x) -#define R2F_U(x) (x) +//! Returns x +static float R2F_D(const float x) +{ + return x; +} +//! Returns x +static float R2F_U(const float x) +{ + return x; +} #endif -/* Coordinate order x,y,z, bb order xyz0 */ -static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb) +//! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0 +static void calc_bounding_box(int na, int stride, const real *x, + BoundingBox *bb) { int i; real xl, xh, yl, yh, zl, zh; @@ -410,8 +470,9 @@ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb) bb->upper[BB_Z] = R2F_U(zh); } -/* Packed coordinates, bb order xyz0 */ -static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb) +/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */ +static void calc_bounding_box_x_x4(int na, const real *x, + BoundingBox *bb) { real xl, xh, yl, yh, zl, zh; @@ -439,8 +500,9 @@ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb) bb->upper[BB_Z] = R2F_U(zh); } -/* Packed coordinates, bb order xyz0 */ -static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb) +/*! \brief Computes the bounding box for na coordinates, bb order xyz0 */ +static void calc_bounding_box_x_x8(int na, const real *x, + BoundingBox *bb) { real xl, xh, yl, yh, zl, zh; @@ -468,9 +530,10 @@ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb) bb->upper[BB_Z] = R2F_U(zh); } -/* Packed coordinates, bb order xyz0 */ +/*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */ gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x, - nbnxn_bb_t *bb, nbnxn_bb_t *bbj) + BoundingBox *bb, + BoundingBox *bbj) { // TODO: During SIMDv2 transition only some archs use namespace (remove when done) using namespace gmx; @@ -512,7 +575,7 @@ gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x, #if NBNXN_SEARCH_BB_SIMD4 -/* Coordinate order xyz, bb order xxxxyyyyzzzz */ +/*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb) { int i; @@ -549,8 +612,9 @@ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb) #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB -/* Coordinate order xyz?, bb order xyz0 */ -static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb) +/*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */ +static void calc_bounding_box_simd4(int na, const float *x, + BoundingBox *bb) { // TODO: During SIMDv2 transition only some archs use namespace (remove when done) using namespace gmx; @@ -572,9 +636,9 @@ static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb) store4(&bb->upper[0], bb_1_S); } -/* Coordinate order xyz?, bb order xxxxyyyyzzzz */ +/*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */ static void calc_bounding_box_xxxx_simd4(int na, const float *x, - nbnxn_bb_t *bb_work_aligned, + BoundingBox *bb_work_aligned, real *bb) { calc_bounding_box_simd4(na, x, bb_work_aligned); @@ -590,21 +654,22 @@ static void calc_bounding_box_xxxx_simd4(int na, const float *x, #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */ -/* Combines pairs of consecutive bounding boxes */ -static void combine_bounding_box_pairs(nbnxn_grid_t *grid, - gmx::ArrayRef bb) +/*! \brief Combines pairs of consecutive bounding boxes */ +static void combine_bounding_box_pairs(const Grid &grid, + gmx::ArrayRef bb, + gmx::ArrayRef bbj) { // TODO: During SIMDv2 transition only some archs use namespace (remove when done) using namespace gmx; - for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY]; i++) + for (int i = 0; i < grid.numColumns(); i++) { /* Starting bb in a column is expected to be 2-aligned */ - int sc2 = grid->cxy_ind[i]>>1; + const int sc2 = grid.firstCellInColumn(i) >> 1; /* For odd numbers skip the last bb here */ - int nc2 = (grid->cxy_na[i]+3)>>(2+1); - int c2; - for (c2 = sc2; c2 < sc2+nc2; c2++) + const int nc2 = (grid.numAtomsInColumn(i) + 3) >> (2 + 1); + int c2; + for (c2 = sc2; c2 < sc2 + nc2; c2++) { #if NBNXN_SEARCH_BB_SIMD4 Simd4Float min_S, max_S; @@ -613,72 +678,73 @@ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, load4(&bb[c2*2+1].lower[0])); max_S = max(load4(&bb[c2*2+0].upper[0]), load4(&bb[c2*2+1].upper[0])); - store4(&grid->bbj[c2].lower[0], min_S); - store4(&grid->bbj[c2].upper[0], max_S); + store4(&bbj[c2].lower[0], min_S); + store4(&bbj[c2].upper[0], max_S); #else for (int j = 0; j < NNBSBB_C; j++) { - grid->bbj[c2].lower[j] = std::min(bb[c2*2+0].lower[j], - bb[c2*2+1].lower[j]); - grid->bbj[c2].upper[j] = std::max(bb[c2*2+0].upper[j], - bb[c2*2+1].upper[j]); + bbj[c2].lower[j] = std::min(bb[c2*2 + 0].lower[j], + bb[c2*2 + 1].lower[j]); + bbj[c2].upper[j] = std::max(bb[c2*2 + 0].upper[j], + bb[c2*2 + 1].upper[j]); } #endif } - if (((grid->cxy_na[i]+3)>>2) & 1) + if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1) { /* The bb count in this column is odd: duplicate the last bb */ for (int j = 0; j < NNBSBB_C; j++) { - grid->bbj[c2].lower[j] = bb[c2*2].lower[j]; - grid->bbj[c2].upper[j] = bb[c2*2].upper[j]; + bbj[c2].lower[j] = bb[c2*2].lower[j]; + bbj[c2].upper[j] = bb[c2*2].upper[j]; } } } } -/* Prints the average bb size, used for debug output */ -static void print_bbsizes_simple(FILE *fp, - const nbnxn_grid_t *grid) +/*! \brief Prints the average bb size, used for debug output */ +static void print_bbsizes_simple(FILE *fp, + const Grid &grid) { dvec ba; clear_dvec(ba); - for (int c = 0; c < grid->nc; c++) + for (int c = 0; c < grid.numCells(); c++) { for (int d = 0; d < DIM; d++) { - ba[d] += grid->bb[c].upper[d] - grid->bb[c].lower[d]; + ba[d] += grid.iBoundingBoxes()[c].upper[d] - grid.iBoundingBoxes()[c].lower[d]; } } - dsvmul(1.0/grid->nc, ba, ba); + dsvmul(1.0/grid.numCells(), ba, ba); - real avgCellSizeZ = (grid->atom_density > 0 ? grid->na_c/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]) : 0.0); + const Grid::Dimensions &dims = grid.dimensions(); + real avgCellSizeZ = (dims.atomDensity > 0 ? grid.geometry().numAtomsICluster/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]) : 0.0); fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n", - grid->cellSize[XX], - grid->cellSize[YY], + dims.cellSize[XX], + dims.cellSize[YY], avgCellSizeZ, ba[XX], ba[YY], ba[ZZ], - ba[XX]*grid->invCellSize[XX], - ba[YY]*grid->invCellSize[YY], - grid->atom_density > 0 ? ba[ZZ]/avgCellSizeZ : 0.0); + ba[XX]*dims.invCellSize[XX], + ba[YY]*dims.invCellSize[YY], + dims.atomDensity > 0 ? ba[ZZ]/avgCellSizeZ : 0.0); } -/* Prints the average bb size, used for debug output */ -static void print_bbsizes_supersub(FILE *fp, - const nbnxn_grid_t *grid) +/*! \brief Prints the average bb size, used for debug output */ +static void print_bbsizes_supersub(FILE *fp, + const Grid &grid) { int ns; dvec ba; clear_dvec(ba); ns = 0; - for (int c = 0; c < grid->nc; c++) + for (int c = 0; c < grid.numCells(); c++) { #if NBNXN_BBXXXX - for (int s = 0; s < grid->nsubc[c]; s += STRIDE_PBB) + for (int s = 0; s < grid.numClustersPerCell()[c]; s += STRIDE_PBB) { int cs_w = (c*c_gpuNumClusterPerCell + s)/STRIDE_PBB; for (int i = 0; i < STRIDE_PBB; i++) @@ -686,38 +752,41 @@ static void print_bbsizes_supersub(FILE *fp, for (int d = 0; d < DIM; d++) { ba[d] += - grid->pbb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] - - grid->pbb[cs_w*NNBSBB_XXXX+ d *STRIDE_PBB+i]; + grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + (DIM + d)*STRIDE_PBB + i] - + grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + d *STRIDE_PBB + i]; } } } #else - for (int s = 0; s < grid->nsubc[c]; s++) + for (int s = 0; s < grid.numClustersPerCell()[c]; s++) { int cs = c*c_gpuNumClusterPerCell + s; for (int d = 0; d < DIM; d++) { - ba[d] += grid->bb[cs].upper[d] - grid->bb[cs].lower[d]; + ba[d] += grid.iBoundingBoxes()[cs].upper[d] - grid.iBoundingBoxes()[cs].lower[d]; } } #endif - ns += grid->nsubc[c]; + ns += grid.numClustersPerCell()[c]; } dsvmul(1.0/ns, ba, ba); - real avgClusterSizeZ = (grid->atom_density > 0 ? grid->na_sc/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0); + const Grid::Dimensions &dims = grid.dimensions(); + const real avgClusterSizeZ = + (dims.atomDensity > 0 ? grid.geometry().numAtomsPerCell/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0); fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n", - grid->cellSize[XX]/c_gpuNumClusterPerCellX, - grid->cellSize[YY]/c_gpuNumClusterPerCellY, + dims.cellSize[XX]/c_gpuNumClusterPerCellX, + dims.cellSize[YY]/c_gpuNumClusterPerCellY, avgClusterSizeZ, ba[XX], ba[YY], ba[ZZ], - ba[XX]*c_gpuNumClusterPerCellX*grid->invCellSize[XX], - ba[YY]*c_gpuNumClusterPerCellY*grid->invCellSize[YY], - grid->atom_density > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0); + ba[XX]*c_gpuNumClusterPerCellX*dims.invCellSize[XX], + ba[YY]*c_gpuNumClusterPerCellY*dims.invCellSize[YY], + dims.atomDensity > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0); } -/* Set non-bonded interaction flags for the current cluster. +/*!\brief Set non-bonded interaction flags for the current cluster. + * * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end. */ static void sort_cluster_on_flag(int numAtomsInCluster, @@ -792,28 +861,28 @@ static void sort_cluster_on_flag(int numAtomsInCluster, } } -/* Fill a pair search cell with atoms. +/*! \brief Fill a pair search cell with atoms. + * * Potentially sorts atoms and sets the interaction flags. */ -static void fill_cell(nbnxn_search *nbs, - nbnxn_grid_t *grid, - nbnxn_atomdata_t *nbat, - int atomStart, - int atomEnd, - const int *atinfo, - gmx::ArrayRef x, - nbnxn_bb_t gmx_unused *bb_work_aligned) +void Grid::fillCell(nbnxn_search *nbs, + nbnxn_atomdata_t *nbat, + int atomStart, + int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + BoundingBox gmx_unused *bb_work_aligned) { const int numAtoms = atomEnd - atomStart; - if (grid->bSimple) + if (geometry_.isSimple) { /* Note that non-local grids are already sorted. * Then sort_cluster_on_flag will only set the flags and the sorting * will not affect the atom order. */ - sort_cluster_on_flag(grid->na_c, atomStart, atomEnd, atinfo, nbs->a, - grid->flags.data() + (atomStart >> grid->na_c_2log) - grid->cell0); + sort_cluster_on_flag(geometry_.numAtomsICluster, atomStart, atomEnd, atinfo, nbs->a, + flags_.data() + atomToCluster(atomStart) - cellOffset_); } if (nbs->bFEP) @@ -821,13 +890,13 @@ static void fill_cell(nbnxn_search *nbs, /* Set the fep flag for perturbed atoms in this (sub-)cell */ /* The grid-local cluster/(sub-)cell index */ - int cell = (atomStart >> grid->na_c_2log) - grid->cell0*(grid->bSimple ? 1 : c_gpuNumClusterPerCell); - grid->fep[cell] = 0; + int cell = atomToCluster(atomStart) - cellOffset_*(geometry_.isSimple ? 1 : c_gpuNumClusterPerCell); + fep_[cell] = 0; for (int at = atomStart; at < atomEnd; at++) { if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]])) { - grid->fep[cell] |= (1 << (at - atomStart)); + fep_[cell] |= (1 << (at - atomStart)); } } } @@ -838,21 +907,21 @@ static void fill_cell(nbnxn_search *nbs, nbs->cell[nbs->a[at]] = at; } - copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c, + copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, geometry_.numAtomsICluster, as_rvec_array(x.data()), nbat->XFormat, nbat->x().data(), atomStart); if (nbat->XFormat == nbatX4) { /* Store the bounding boxes as xyz.xyz. */ - size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log; - nbnxn_bb_t *bb_ptr = grid->bb.data() + offset; + size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster); + BoundingBox *bb_ptr = bb_.data() + offset; #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2 - if (2*grid->na_cj == grid->na_c) + if (2*geometry_.numAtomsJCluster == geometry_.numAtomsICluster) { calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index(atomStart), bb_ptr, - grid->bbj.data() + offset*2); + bbj_.data() + offset*2); } else #endif @@ -863,21 +932,21 @@ static void fill_cell(nbnxn_search *nbs, else if (nbat->XFormat == nbatX8) { /* Store the bounding boxes as xyz.xyz. */ - size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log; - nbnxn_bb_t *bb_ptr = grid->bb.data() + offset; + size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster); + BoundingBox *bb_ptr = bb_.data() + offset; calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index(atomStart), bb_ptr); } #if NBNXN_BBXXXX - else if (!grid->bSimple) + else if (!geometry_.isSimple) { /* Store the bounding boxes in a format convenient * for SIMD4 calculations: xxxxyyyyzzzz... */ float *pbb_ptr = - grid->pbb.data() + - ((atomStart - grid->cell0*grid->na_sc) >> (grid->na_c_2log + STRIDE_PBB_2LOG))*NNBSBB_XXXX + - (((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log) & (STRIDE_PBB - 1)); + pbb_.data() + + ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> (geometry_.numAtomsICluster2Log + STRIDE_PBB_2LOG))*NNBSBB_XXXX + + (((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log) & (STRIDE_PBB - 1)); #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB if (nbat->XFormat == nbatXYZQ) @@ -894,7 +963,7 @@ static void fill_cell(nbnxn_search *nbs, if (gmx_debug_at) { fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", - atomStart >> grid->na_c_2log, + atomToCluster(atomStart), pbb_ptr[0*STRIDE_PBB], pbb_ptr[3*STRIDE_PBB], pbb_ptr[1*STRIDE_PBB], pbb_ptr[4*STRIDE_PBB], pbb_ptr[2*STRIDE_PBB], pbb_ptr[5*STRIDE_PBB]); @@ -904,97 +973,97 @@ static void fill_cell(nbnxn_search *nbs, else { /* Store the bounding boxes as xyz.xyz. */ - nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log); + BoundingBox *bb_ptr = bb_.data() + atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell); calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride, bb_ptr); if (gmx_debug_at) { - int bbo = (atomStart - grid->cell0*grid->na_sc)/grid->na_c; + int bbo = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell); fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n", - atomStart >> grid->na_c_2log, - grid->bb[bbo].lower[BB_X], - grid->bb[bbo].lower[BB_Y], - grid->bb[bbo].lower[BB_Z], - grid->bb[bbo].upper[BB_X], - grid->bb[bbo].upper[BB_Y], - grid->bb[bbo].upper[BB_Z]); + atomToCluster(atomStart), + bb_[bbo].lower[BB_X], + bb_[bbo].lower[BB_Y], + bb_[bbo].lower[BB_Z], + bb_[bbo].upper[BB_X], + bb_[bbo].upper[BB_Y], + bb_[bbo].upper[BB_Z]); } } } -/* Spatially sort the atoms within one grid column */ -static void sort_columns_simple(nbnxn_search *nbs, - int dd_zone, - nbnxn_grid_t *grid, - int atomStart, int atomEnd, - const int *atinfo, - gmx::ArrayRef x, - nbnxn_atomdata_t *nbat, - int cxy_start, int cxy_end, - gmx::ArrayRef sort_work) +void Grid::sortColumnsCpuGeometry(nbnxn_search *nbs, + int dd_zone, + int atomStart, int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + nbnxn_atomdata_t *nbat, + int cxy_start, int cxy_end, + gmx::ArrayRef sort_work) { if (debug) { - fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n", - grid->cell0, cxy_start, cxy_end, atomStart, atomEnd); + fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n", + cellOffset_, cxy_start, cxy_end, atomStart, atomEnd); } - const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0); + const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0); + + const int numAtomsPerCell = geometry_.numAtomsPerCell; /* Sort the atoms within each x,y column in 3 dimensions */ for (int cxy = cxy_start; cxy < cxy_end; cxy++) { - int na = grid->cxy_na[cxy]; - int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy]; - int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + const int numAtoms = numAtomsInColumn(cxy); + const int numCellsZ = cxy_ind_[cxy + 1] - cxy_ind_[cxy]; + const int atomOffset = firstAtomInColumn(cxy); /* Sort the atoms within each x,y column on z coordinate */ sort_atoms(ZZ, FALSE, dd_zone, relevantAtomsAreWithinGridBounds, - nbs->a.data() + ash, na, x, - grid->c0[ZZ], - 1.0/grid->size[ZZ], ncz*grid->na_sc, + nbs->a.data() + atomOffset, numAtoms, x, + dimensions_.lowerCorner[ZZ], + 1.0/dimensions_.gridSize[ZZ], numCellsZ*numAtomsPerCell, sort_work); /* Fill the ncz cells in this column */ - int cfilled = grid->cxy_ind[cxy]; - for (int cz = 0; cz < ncz; cz++) + const int firstCell = firstCellInColumn(cxy); + int cellFilled = firstCell; + for (int cellZ = 0; cellZ < numCellsZ; cellZ++) { - int c = grid->cxy_ind[cxy] + cz; + const int cell = firstCell + cellZ; - int ash_c = ash + cz*grid->na_sc; - int na_c = std::min(grid->na_sc, na-(ash_c-ash)); + const int atomOffsetCell = atomOffset + cellZ*numAtomsPerCell; + const int numAtomsCell = std::min(numAtomsPerCell, numAtoms - (atomOffsetCell - atomOffset)); - fill_cell(nbs, grid, nbat, - ash_c, ash_c+na_c, atinfo, x, - nullptr); + fillCell(nbs, nbat, + atomOffsetCell, atomOffsetCell + numAtomsCell, atinfo, x, + nullptr); /* This copy to bbcz is not really necessary. * But it allows to use the same grid search code * for the simple and supersub cell setups. */ - if (na_c > 0) + if (numAtomsCell > 0) { - cfilled = c; + cellFilled = cell; } - grid->bbcz[c*NNBSBB_D ] = grid->bb[cfilled].lower[BB_Z]; - grid->bbcz[c*NNBSBB_D+1] = grid->bb[cfilled].upper[BB_Z]; + bbcz_[cell*NNBSBB_D ] = bb_[cellFilled].lower[BB_Z]; + bbcz_[cell*NNBSBB_D + 1] = bb_[cellFilled].upper[BB_Z]; } /* Set the unused atom indices to -1 */ - for (int ind = na; ind < ncz*grid->na_sc; ind++) + for (int ind = numAtoms; ind < numCellsZ*numAtomsPerCell; ind++) { - nbs->a[ash+ind] = -1; + nbs->a[atomOffset + ind] = -1; } } } /* Spatially sort the atoms within one grid column */ -static void sort_columns_supersub(nbnxn_search *nbs, +void Grid::sortColumnsGpuGeometry(nbnxn_search *nbs, int dd_zone, - nbnxn_grid_t *grid, int atomStart, int atomEnd, const int *atinfo, gmx::ArrayRef x, @@ -1002,63 +1071,65 @@ static void sort_columns_supersub(nbnxn_search *nbs, int cxy_start, int cxy_end, gmx::ArrayRef sort_work) { - nbnxn_bb_t bb_work_array[2]; - nbnxn_bb_t *bb_work_aligned = reinterpret_cast((reinterpret_cast(bb_work_array + 1)) & (~(static_cast(15)))); + BoundingBox bb_work_array[2]; + BoundingBox *bb_work_aligned = reinterpret_cast((reinterpret_cast(bb_work_array + 1)) & (~(static_cast(15)))); if (debug) { - fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n", - grid->cell0, cxy_start, cxy_end, atomStart, atomEnd); + fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n", + cellOffset_, cxy_start, cxy_end, atomStart, atomEnd); } - const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0); + const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0); - int subdiv_x = grid->na_c; - int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x; - int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y; + const int numAtomsPerCell = geometry_.numAtomsPerCell; + + const int subdiv_x = geometry_.numAtomsICluster; + const int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x; + const int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y; /* Sort the atoms within each x,y column in 3 dimensions. * Loop over all columns on the x/y grid. */ for (int cxy = cxy_start; cxy < cxy_end; cxy++) { - int gridX = cxy/grid->numCells[YY]; - int gridY = cxy - gridX*grid->numCells[YY]; + const int gridX = cxy/dimensions_.numCells[YY]; + const int gridY = cxy - gridX*dimensions_.numCells[YY]; - int numAtomsInColumn = grid->cxy_na[cxy]; - int numCellsInColumn = grid->cxy_ind[cxy + 1] - grid->cxy_ind[cxy]; - int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + const int numAtomsInColumn = cxy_na_[cxy]; + const int numCellsInColumn = cxy_ind_[cxy + 1] - cxy_ind_[cxy]; + const int atomOffset = firstAtomInColumn(cxy); /* Sort the atoms within each x,y column on z coordinate */ sort_atoms(ZZ, FALSE, dd_zone, relevantAtomsAreWithinGridBounds, - nbs->a.data() + ash, numAtomsInColumn, x, - grid->c0[ZZ], - 1.0/grid->size[ZZ], numCellsInColumn*grid->na_sc, + nbs->a.data() + atomOffset, numAtomsInColumn, x, + dimensions_.lowerCorner[ZZ], + 1.0/dimensions_.gridSize[ZZ], numCellsInColumn*numAtomsPerCell, sort_work); /* This loop goes over the cells and clusters along z at once */ for (int sub_z = 0; sub_z < numCellsInColumn*c_gpuNumClusterPerCellZ; sub_z++) { - int ash_z = ash + sub_z*subdiv_z; - int na_z = std::min(subdiv_z, numAtomsInColumn - (ash_z - ash)); - int cz = -1; + const int atomOffsetZ = atomOffset + sub_z*subdiv_z; + const int numAtomsZ = std::min(subdiv_z, numAtomsInColumn - (atomOffsetZ - atomOffset)); + int cz = -1; /* We have already sorted on z */ if (sub_z % c_gpuNumClusterPerCellZ == 0) { cz = sub_z/c_gpuNumClusterPerCellZ; - int cell = grid->cxy_ind[cxy] + cz; + const int cell = cxy_ind_[cxy] + cz; /* The number of atoms in this cell/super-cluster */ - int numAtoms = std::min(grid->na_sc, numAtomsInColumn - (ash_z - ash)); + const int numAtoms = std::min(numAtomsPerCell, numAtomsInColumn - (atomOffsetZ - atomOffset)); - grid->nsubc[cell] = std::min(c_gpuNumClusterPerCell, - (numAtoms + grid->na_c - 1)/grid->na_c); + numClusters_[cell] = std::min(c_gpuNumClusterPerCell, + (numAtoms + geometry_.numAtomsICluster - 1)/ geometry_.numAtomsICluster); /* Store the z-boundaries of the bounding box of the cell */ - grid->bbcz[cell*NNBSBB_D ] = x[nbs->a[ash_z]][ZZ]; - grid->bbcz[cell*NNBSBB_D+1] = x[nbs->a[ash_z + numAtoms - 1]][ZZ]; + bbcz_[cell*NNBSBB_D ] = x[nbs->a[atomOffsetZ]][ZZ]; + bbcz_[cell*NNBSBB_D+1] = x[nbs->a[atomOffsetZ + numAtoms - 1]][ZZ]; } if (c_gpuNumClusterPerCellY > 1) @@ -1066,49 +1137,49 @@ static void sort_columns_supersub(nbnxn_search *nbs, /* Sort the atoms along y */ sort_atoms(YY, (sub_z & 1) != 0, dd_zone, relevantAtomsAreWithinGridBounds, - nbs->a.data() + ash_z, na_z, x, - grid->c0[YY] + gridY*grid->cellSize[YY], - grid->invCellSize[YY], subdiv_z, + nbs->a.data() + atomOffsetZ, numAtomsZ, x, + dimensions_.lowerCorner[YY] + gridY*dimensions_.cellSize[YY], + dimensions_.invCellSize[YY], subdiv_z, sort_work); } for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++) { - int ash_y = ash_z + sub_y*subdiv_y; - int na_y = std::min(subdiv_y, numAtomsInColumn - (ash_y - ash)); + const int atomOffsetY = atomOffsetZ + sub_y*subdiv_y; + const int numAtomsY = std::min(subdiv_y, numAtomsInColumn - (atomOffsetY - atomOffset)); if (c_gpuNumClusterPerCellX > 1) { /* Sort the atoms along x */ sort_atoms(XX, ((cz*c_gpuNumClusterPerCellY + sub_y) & 1) != 0, dd_zone, relevantAtomsAreWithinGridBounds, - nbs->a.data() + ash_y, na_y, x, - grid->c0[XX] + gridX*grid->cellSize[XX], - grid->invCellSize[XX], subdiv_y, + nbs->a.data() + atomOffsetY, numAtomsY, x, + dimensions_.lowerCorner[XX] + gridX*dimensions_.cellSize[XX], + dimensions_.invCellSize[XX], subdiv_y, sort_work); } for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++) { - int ash_x = ash_y + sub_x*subdiv_x; - int na_x = std::min(subdiv_x, numAtomsInColumn - (ash_x - ash)); + const int atomOffsetX = atomOffsetY + sub_x*subdiv_x; + const int numAtomsX = std::min(subdiv_x, numAtomsInColumn - (atomOffsetX - atomOffset)); - fill_cell(nbs, grid, nbat, - ash_x, ash_x + na_x, atinfo, x, - bb_work_aligned); + fillCell(nbs, nbat, + atomOffsetX, atomOffsetX + numAtomsX, atinfo, x, + bb_work_aligned); } } } /* Set the unused atom indices to -1 */ - for (int ind = numAtomsInColumn; ind < numCellsInColumn*grid->na_sc; ind++) + for (int ind = numAtomsInColumn; ind < numCellsInColumn*numAtomsPerCell; ind++) { - nbs->a[ash + ind] = -1; + nbs->a[atomOffset + ind] = -1; } } } -/* Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */ +/*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */ static void setCellAndAtomCount(gmx::ArrayRef cell, int cellIndex, gmx::ArrayRef cxy_na, @@ -1118,8 +1189,8 @@ static void setCellAndAtomCount(gmx::ArrayRef cell, cxy_na[cellIndex] += 1; } -/* Determine in which grid column atoms should go */ -static void calc_column_indices(nbnxn_grid_t *grid, +/*! \brief Determine in which grid column atoms should go */ +static void calc_column_indices(const Grid::Dimensions &gridDims, const gmx::UpdateGroupsCog *updateGroupsCog, int atomStart, int atomEnd, gmx::ArrayRef x, @@ -1128,14 +1199,17 @@ static void calc_column_indices(nbnxn_grid_t *grid, gmx::ArrayRef cell, gmx::ArrayRef cxy_na) { + const int numColumns = gridDims.numCells[XX]*gridDims.numCells[YY]; + /* We add one extra cell for particles which moved during DD */ - for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++) + for (int i = 0; i < numColumns; i++) { cxy_na[i] = 0; } int taskAtomStart = atomStart + static_cast((thread + 0)*(atomEnd - atomStart))/nthread; int taskAtomEnd = atomStart + static_cast((thread + 1)*(atomEnd - atomStart))/nthread; + if (dd_zone == 0) { /* Home zone */ @@ -1151,35 +1225,39 @@ static void calc_column_indices(nbnxn_grid_t *grid, * The int cast takes care of the lower bound, * we will explicitly take care of the upper bound. */ - int cx = static_cast((coord[XX] - grid->c0[XX])*grid->invCellSize[XX]); - int cy = static_cast((coord[YY] - grid->c0[YY])*grid->invCellSize[YY]); + int cx = static_cast((coord[XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]); + int cy = static_cast((coord[YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]); #ifndef NDEBUG - if (cx < 0 || cx > grid->numCells[XX] || - cy < 0 || cy > grid->numCells[YY]) + if (cx < 0 || cx > gridDims.numCells[XX] || + cy < 0 || cy > gridDims.numCells[YY]) { gmx_fatal(FARGS, "grid cell cx %d cy %d out of range (max %d %d)\n" "atom %f %f %f, grid->c0 %f %f", - cx, cy, grid->numCells[XX], grid->numCells[YY], - x[i][XX], x[i][YY], x[i][ZZ], grid->c0[XX], grid->c0[YY]); + cx, cy, + gridDims.numCells[XX], + gridDims.numCells[YY], + x[i][XX], x[i][YY], x[i][ZZ], + gridDims.lowerCorner[XX], + gridDims.lowerCorner[YY]); } #endif /* Take care of potential rouding issues */ - cx = std::min(cx, grid->numCells[XX] - 1); - cy = std::min(cy, grid->numCells[YY] - 1); + cx = std::min(cx, gridDims.numCells[XX] - 1); + cy = std::min(cy, gridDims.numCells[YY] - 1); /* For the moment cell will contain only the, grid local, * x and y indices, not z. */ - setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i); + setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i); } else { /* Put this moved particle after the end of the grid, * so we can process it later without using conditionals. */ - setCellAndAtomCount(cell, grid->numCells[XX]*grid->numCells[YY], cxy_na, i); + setCellAndAtomCount(cell, numColumns, cxy_na, i); } } } @@ -1188,8 +1266,8 @@ static void calc_column_indices(nbnxn_grid_t *grid, /* Non-home zone */ for (int i = taskAtomStart; i < taskAtomEnd; i++) { - int cx = static_cast((x[i][XX] - grid->c0[XX])*grid->invCellSize[XX]); - int cy = static_cast((x[i][YY] - grid->c0[YY])*grid->invCellSize[YY]); + int cx = static_cast((x[i][XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]); + int cy = static_cast((x[i][YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]); /* For non-home zones there could be particles outside * the non-bonded cut-off range, which have been communicated @@ -1198,26 +1276,24 @@ static void calc_column_indices(nbnxn_grid_t *grid, * we put them in an extra row at the border. */ cx = std::max(cx, 0); - cx = std::min(cx, grid->numCells[XX] - 1); + cx = std::min(cx, gridDims.numCells[XX] - 1); cy = std::max(cy, 0); - cy = std::min(cy, grid->numCells[YY] - 1); + cy = std::min(cy, gridDims.numCells[YY] - 1); /* For the moment cell will contain only the, grid local, * x and y indices, not z. */ - setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i); + setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i); } } } -/* Resizes grid and atom data which depend on the number of cells */ -static void resizeForNumberOfCells(const nbnxn_grid_t &grid, - int numAtomsMoved, +/*! \brief Resizes grid and atom data which depend on the number of cells */ +static void resizeForNumberOfCells(const int numNbnxnAtoms, + const int numAtomsMoved, nbnxn_search *nbs, nbnxn_atomdata_t *nbat) { - int numNbnxnAtoms = (grid.cell0 + grid.nc)*grid.na_sc; - /* Note: nbs->cell was already resized before */ /* To avoid conditionals we store the moved particles at the end of a, @@ -1229,20 +1305,20 @@ static void resizeForNumberOfCells(const nbnxn_grid_t &grid, nbat->resizeCoordinateBuffer(numNbnxnAtoms); } -/* Determine in which grid cells the atoms should go */ -static void -calc_cell_indices(nbnxn_search *nbs, - int ddZone, - nbnxn_grid_t *grid, - const gmx::UpdateGroupsCog *updateGroupsCog, - int atomStart, - int atomEnd, - const int *atinfo, - gmx::ArrayRef x, - int numAtomsMoved, - const int *move, - nbnxn_atomdata_t *nbat) +void Grid::calcCellIndices(nbnxn_search *nbs, + int ddZone, + int cellOffset, + const gmx::UpdateGroupsCog *updateGroupsCog, + int atomStart, + int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + int numAtomsMoved, + const int *move, + nbnxn_atomdata_t *nbat) { + cellOffset_ = cellOffset; + /* First compute all grid/column indices and store them in nbs->cell */ nbs->cell.resize(atomEnd); @@ -1253,7 +1329,8 @@ calc_cell_indices(nbnxn_search *nbs, { try { - calc_column_indices(grid, updateGroupsCog, + calc_column_indices(dimensions_, + updateGroupsCog, atomStart, atomEnd, x, ddZone, move, thread, nthread, nbs->cell, nbs->work[thread].cxy_na); @@ -1261,11 +1338,13 @@ calc_cell_indices(nbnxn_search *nbs, GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } + const int numAtomsPerCell = geometry_.numAtomsPerCell; + /* Make the cell index as a function of x and y */ int ncz_max = 0; int ncz = 0; - grid->cxy_ind[0] = 0; - for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++) + cxy_ind_[0] = 0; + for (int i = 0; i < numColumns() + 1; i++) { /* We set ncz_max at the beginning of the loop iso at the end * to skip i=grid->ncx*grid->numCells[YY] which are moved particles @@ -1280,34 +1359,35 @@ calc_cell_indices(nbnxn_search *nbs, { cxy_na_i += nbs->work[thread].cxy_na[i]; } - ncz = (cxy_na_i + grid->na_sc - 1)/grid->na_sc; + ncz = (cxy_na_i + numAtomsPerCell - 1)/numAtomsPerCell; if (nbat->XFormat == nbatX8) { /* Make the number of cell a multiple of 2 */ ncz = (ncz + 1) & ~1; } - grid->cxy_ind[i+1] = grid->cxy_ind[i] + ncz; - /* Clear cxy_na, so we can reuse the array below */ - grid->cxy_na[i] = 0; + cxy_ind_[i+1] = cxy_ind_[i] + ncz; + /* Clear cxy_na_, so we can reuse the array below */ + cxy_na_[i] = 0; } - grid->nc = grid->cxy_ind[grid->numCells[XX]*grid->numCells[YY]] - grid->cxy_ind[0]; + numCellsTotal_ = cxy_ind_[numColumns()] - cxy_ind_[0]; - resizeForNumberOfCells(*grid, numAtomsMoved, nbs, nbat); + resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved, nbs, nbat); if (debug) { fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n", - grid->na_sc, grid->na_c, grid->nc, - grid->numCells[XX], grid->numCells[YY], grid->nc/(static_cast(grid->numCells[XX]*grid->numCells[YY])), + numAtomsPerCell, geometry_.numAtomsICluster, numCellsTotal_, + dimensions_.numCells[XX], dimensions_.numCells[YY], + numCellsTotal_/(static_cast(numColumns())), ncz_max); if (gmx_debug_at) { int i = 0; - for (int cy = 0; cy < grid->numCells[YY]; cy++) + for (int cy = 0; cy < dimensions_.numCells[YY]; cy++) { - for (int cx = 0; cx < grid->numCells[XX]; cx++) + for (int cx = 0; cx < dimensions_.numCells[XX]; cx++) { - fprintf(debug, " %2d", grid->cxy_ind[i+1]-grid->cxy_ind[i]); + fprintf(debug, " %2d", cxy_ind_[i + 1] - cxy_ind_[i]); i++; } fprintf(debug, "\n"); @@ -1316,12 +1396,13 @@ calc_cell_indices(nbnxn_search *nbs, } /* Make sure the work array for sorting is large enough */ - if (ncz_max*grid->na_sc*SGSF > gmx::index(nbs->work[0].sortBuffer.size())) + const int worstCaseSortBufferSize = ncz_max*numAtomsPerCell*c_sortGridMaxSizeFactor; + if (worstCaseSortBufferSize > gmx::index(nbs->work[0].sortBuffer.size())) { for (nbnxn_search_work_t &work : nbs->work) { /* Elements not in use should be -1 */ - work.sortBuffer.resize(ncz_max*grid->na_sc*SGSF, -1); + work.sortBuffer.resize(worstCaseSortBufferSize, -1); } } @@ -1332,14 +1413,14 @@ calc_cell_indices(nbnxn_search *nbs, { /* At this point nbs->cell contains the local grid x,y indices */ int cxy = nbs->cell[i]; - nbs->a[(grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc + grid->cxy_na[cxy]++] = i; + nbs->a[firstAtomInColumn(cxy) + cxy_na_[cxy]++] = i; } if (ddZone == 0) { /* Set the cell indices for the moved particles */ - int n0 = grid->nc*grid->na_sc; - int n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->numCells[XX]*grid->numCells[YY]]; + int n0 = numCellsTotal_*numAtomsPerCell; + int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()]; if (ddZone == 0) { for (int i = n0; i < n1; i++) @@ -1355,54 +1436,59 @@ calc_cell_indices(nbnxn_search *nbs, { try { - int columnStart = ((thread + 0)*grid->numCells[XX]*grid->numCells[YY])/nthread; - int columnEnd = ((thread + 1)*grid->numCells[XX]*grid->numCells[YY])/nthread; - if (grid->bSimple) + int columnStart = ((thread + 0)*numColumns())/nthread; + int columnEnd = ((thread + 1)*numColumns())/nthread; + if (geometry_.isSimple) { - sort_columns_simple(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat, - columnStart, columnEnd, - nbs->work[thread].sortBuffer); + sortColumnsCpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat, + columnStart, columnEnd, + nbs->work[thread].sortBuffer); } else { - sort_columns_supersub(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat, - columnStart, columnEnd, - nbs->work[thread].sortBuffer); + sortColumnsGpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat, + columnStart, columnEnd, + nbs->work[thread].sortBuffer); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } - if (grid->bSimple && nbat->XFormat == nbatX8) + if (geometry_.isSimple && nbat->XFormat == nbatX8) { - combine_bounding_box_pairs(grid, grid->bb); + combine_bounding_box_pairs(*this, bb_, bbj_); } - if (!grid->bSimple) + if (!geometry_.isSimple) { - grid->nsubc_tot = 0; - for (int i = 0; i < grid->nc; i++) + numClustersTotal_ = 0; + for (int i = 0; i < numCellsTotal_; i++) { - grid->nsubc_tot += grid->nsubc[i]; + numClustersTotal_ += numClusters_[i]; } } if (debug) { - if (grid->bSimple) + if (geometry_.isSimple) { - print_bbsizes_simple(debug, grid); + print_bbsizes_simple(debug, *this); } else { fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n", - grid->nsubc_tot, (atomEnd - atomStart)/static_cast(grid->nsubc_tot)); + numClustersTotal_, + (atomEnd - atomStart)/static_cast(numClustersTotal_)); - print_bbsizes_supersub(debug, grid); + print_bbsizes_supersub(debug, *this); } } } +} // namespace Nbnxm + +// TODO: Move the functions below to nbnxn.cpp + /* Sets up a grid and puts the atoms on the grid. * This function only operates on one domain of the domain decompostion. * Note that without domain decomposition there is only one domain. @@ -1422,82 +1508,63 @@ void nbnxn_put_on_grid(nonbonded_verlet_t *nbv, const int *move) { nbnxn_search *nbs = nbv->nbs.get(); - nbnxn_grid_t *grid = &nbs->grid[ddZone]; + Nbnxm::Grid &grid = nbs->grid[ddZone]; nbs_cycle_start(&nbs->cc[enbsCCgrid]); - grid->bSimple = nbv->pairlistIsSimple(); - - grid->na_c = IClusterSizePerListType[nbv->pairlistSets().params().pairlistType]; - grid->na_cj = JClusterSizePerListType[nbv->pairlistSets().params().pairlistType]; - grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c; - grid->na_c_2log = get_2log(grid->na_c); - + int cellOffset; if (ddZone == 0) { - grid->cell0 = 0; + cellOffset = 0; } else { - grid->cell0 = - (nbs->grid[ddZone - 1].cell0 + nbs->grid[ddZone - 1].nc)* - nbs->grid[ddZone- 1].na_sc/grid->na_sc; + const Nbnxm::Grid &previousGrid = nbs->grid[ddZone - 1]; + cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell; } const int n = atomEnd - atomStart; + real maxAtomGroupRadius; if (ddZone == 0) { copy_mat(box, nbs->box); - /* Avoid zero density */ - if (atomDensity > 0) - { - grid->atom_density = atomDensity; - } - else - { - grid->atom_density = grid_atom_density(n - numAtomsMoved, lowerCorner, upperCorner); - } - - grid->cell0 = 0; - nbs->natoms_local = atomEnd - numAtomsMoved; /* We assume that nbnxn_put_on_grid is called first * for the local atoms (ddZone=0). */ nbs->natoms_nonlocal = atomEnd - numAtomsMoved; - /* When not using atom groups, all atoms should be within the grid bounds */ - grid->maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0); - /* For the non-local grids the situation is the same as for the local */ - for (size_t g = 1; g < nbs->grid.size(); g++) - { - nbs->grid[g].maxAtomGroupRadius = grid->maxAtomGroupRadius; - } + maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0); if (debug) { fprintf(debug, "natoms_local = %5d atom_density = %5.1f\n", - nbs->natoms_local, grid->atom_density); + nbs->natoms_local, atomDensity); } } else { + const Nbnxm::Grid::Dimensions &dimsGrid0 = nbs->grid[0].dimensions(); + atomDensity = dimsGrid0.atomDensity; + maxAtomGroupRadius = dimsGrid0.maxAtomGroupRadius; + nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, atomEnd); } /* We always use the home zone (grid[0]) for setting the cell size, * since determining densities for non-local zones is difficult. */ - set_grid_size_xy(nbs, grid, - ddZone, n - numAtomsMoved, - lowerCorner, upperCorner, - nbs->grid[0].atom_density); + grid.setDimensions(nbs, + ddZone, n - numAtomsMoved, + lowerCorner, upperCorner, + atomDensity, + maxAtomGroupRadius); nbnxn_atomdata_t *nbat = nbv->nbat.get(); - calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat); + grid.calcCellIndices(nbs, ddZone, cellOffset, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat); if (ddZone == 0) { @@ -1541,16 +1608,16 @@ void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t *nbv, void nbnxn_get_ncells(const nbnxn_search *nbs, int *ncx, int *ncy) { - *ncx = nbs->grid[0].numCells[XX]; - *ncy = nbs->grid[0].numCells[YY]; + *ncx = nbs->grid[0].dimensions().numCells[XX]; + *ncy = nbs->grid[0].dimensions().numCells[YY]; } gmx::ArrayRef nbnxn_get_atomorder(const nbnxn_search *nbs) { /* Return the atom order for the home cell (index 0) */ - const nbnxn_grid_t &grid = nbs->grid[0]; + const Nbnxm::Grid &grid = nbs->grid[0]; - int numIndices = grid.cxy_ind[grid.numCells[XX]*grid.numCells[YY]]*grid.na_sc; + const int numIndices = grid.atomIndexEnd() - grid.firstAtomInColumn(0); return gmx::constArrayRefFromArray(nbs->a.data(), numIndices); } @@ -1558,22 +1625,19 @@ gmx::ArrayRef nbnxn_get_atomorder(const nbnxn_search *nbs) void nbnxn_set_atomorder(nbnxn_search *nbs) { /* Set the atom order for the home cell (index 0) */ - nbnxn_grid_t *grid = &nbs->grid[0]; + const Nbnxm::Grid &grid = nbs->grid[0]; - int ao = 0; - for (int cx = 0; cx < grid->numCells[XX]; cx++) + int atomIndex = 0; + for (int cxy = 0; cxy < grid.numColumns(); cxy++) { - for (int cy = 0; cy < grid->numCells[YY]; cy++) + const int numAtoms = grid.numAtomsInColumn(cxy); + int cellIndex = grid.firstCellInColumn(cxy)*grid.geometry().numAtomsPerCell; + for (int i = 0; i < numAtoms; i++) { - int cxy = cx*grid->numCells[YY] + cy; - int j = grid->cxy_ind[cxy]*grid->na_sc; - for (int cz = 0; cz < grid->cxy_na[cxy]; cz++) - { - nbs->a[j] = ao; - nbs->cell[ao] = j; - ao++; - j++; - } + nbs->a[cellIndex] = atomIndex; + nbs->cell[atomIndex] = cellIndex; + atomIndex++; + cellIndex++; } } } diff --git a/src/gromacs/nbnxm/grid.h b/src/gromacs/nbnxm/grid.h index adab79c8c5..f70ed4453f 100644 --- a/src/gromacs/nbnxm/grid.h +++ b/src/gromacs/nbnxm/grid.h @@ -35,10 +35,18 @@ /*! \internal \file * - * \brief Declares the grid and bounding box objects + * \brief + * Declares the Grid class. * - * \author Berk Hess + * This class provides functionality for setting up and accessing atoms + * on a grid for one domain decomposition zone. This grid is used for + * generating cluster pair lists for computing non-bonded pair interactions. + * The grid consists of a regular array of columns along dimensions x and y. + * Along z the number of cells and their boundaries vary between the columns. + * Each cell can hold one or more clusters of atoms, depending on the grid + * geometry, which is set by the pair-list type. * + * \author Berk Hess * \ingroup module_nbnxm */ @@ -51,13 +59,26 @@ #include "gromacs/math/vectypes.h" #include "gromacs/simd/simd.h" #include "gromacs/utility/alignedallocator.h" +#include "gromacs/utility/arrayref.h" struct gmx_domdec_zones_t; +struct nbnxn_atomdata_t; +struct nbnxn_search; +enum class PairlistType; + +namespace gmx +{ +class UpdateGroupsCog; +} +namespace Nbnxm +{ #ifndef DOXYGEN +// TODO: Convert macros to constexpr int and replace BB_? indexing by struct with x,y,z + /* Pair search box lower and upper corner in x,y,z. * Store this in 4 iso 3 reals, which is useful with 4-wide SIMD. * To avoid complicating the code we also use 4 without 4-wide SIMD. @@ -74,14 +95,17 @@ struct gmx_domdec_zones_t; /* Bounding box for a nbnxn atom cluster */ -typedef struct { +struct BoundingBox +{ float lower[NNBSBB_C]; float upper[NNBSBB_C]; -} nbnxn_bb_t; +}; #ifndef DOXYGEN +// TODO: Convert macros to constexpr int + /* Bounding box calculations are (currently) always in single precision, so * we only need to check for single precision support here. * This uses less (cache-)memory and SIMD is faster, at least on x86. @@ -128,57 +152,308 @@ typedef struct { #endif // !DOXYGEN -/* A pair-search grid struct for one domain decomposition zone +/*! \internal + * \brief A pair-search grid object for one domain decomposition zone + * + * This is a rectangular 3D grid covering a potentially non-rectangular + * volume which is either the whole unit cell or the local zone or part + * of a non-local zone when using domain decomposition. The grid cells + * are even spaced along x/y and irregular along z. Each cell is sub-divided + * into atom clusters. With a CPU geometry, each cell contains 1 or 2 clusters. + * With a GPU geometry, each cell contains up to 8 clusters. The geometry is + * set by the pairlist type which is the only argument of the constructor. + * + * When multiple grids are used, i.e. with domain decomposition, we want + * to avoid the overhead of multiple coordinate arrays or extra indexing. + * Therefore each grid stores a cell offset, so a contiguous cell index + * can be used to index atom arrays. All methods returning atom indices + * return indices which index into a full atom array. * * Note that when atom groups, instead of individual atoms, are assigned * to grid cells, individual atoms can be geometrically outside the cell * and grid that they have been assigned to (as determined by the center * or geometry of the atom group they belong to). */ -struct nbnxn_grid_t +class Grid { - rvec c0; /* The lower corner of the (local) grid */ - rvec c1; /* The upper corner of the (local) grid */ - rvec size; /* c1 - c0 */ - real atom_density; /* The atom number density for the local grid */ - real maxAtomGroupRadius; /* The maximum distance an atom can be outside - * of a cell and outside of the grid - */ - - gmx_bool bSimple; /* Is this grid simple or super/sub */ - int na_c; /* Number of atoms per cluster */ - int na_cj; /* Number of atoms for list j-clusters */ - int na_sc; /* Number of atoms per super-cluster */ - int na_c_2log; /* 2log of na_c */ - - int numCells[DIM - 1]; /* Number of cells along x/y */ - int nc; /* Total number of cells */ - - real cellSize[DIM - 1]; /* size of a cell */ - real invCellSize[DIM - 1]; /* 1/cellSize */ - - int cell0; /* Index in nbs->cell corresponding to cell 0 */ - - /* Grid data */ - std::vector cxy_na; /* The number of atoms for each column in x,y */ - std::vector cxy_ind; /* Grid (super)cell index, offset from cell0 */ - - std::vector nsubc; /* The number of sub cells for each super cell */ - - /* Bounding boxes */ - std::vector bbcz; /* Bounding boxes in z for the cells */ - std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bb; /* 3D bounding boxes for the sub cells */ - std::vector < nbnxn_bb_t, gmx::AlignedAllocator < nbnxn_bb_t>> bbjStorage; /* 3D j-bounding boxes for the case where - * the i- and j-cluster sizes are different */ - gmx::ArrayRef bbj; /* 3D j-bounding boxes */ - std::vector < float, gmx::AlignedAllocator < float>> pbb; /* 3D b. boxes in xxxx format per super cell */ - - /* Bit-flag information */ - std::vector flags; /* Flags for properties of clusters in each cell */ - std::vector fep; /* FEP signal bits for atoms in each cluster */ - - /* Statistics */ - int nsubc_tot; /* Total number of subcell, used for printing */ + public: + /*! \internal + * \brief The cluster and cell geometry of a grid + */ + struct Geometry + { + //! Constructs the cluster/cell geometry given the type of pairlist + Geometry(PairlistType pairlistType); + + bool isSimple; //!< Is this grid simple (CPU) or hierarchical (GPU) + int numAtomsICluster; //!< Number of atoms per cluster + int numAtomsJCluster; //!< Number of atoms for list j-clusters + int numAtomsPerCell; //!< Number of atoms per cell + int numAtomsICluster2Log; //!< 2log of na_c + }; + + // The physical dimensions of a grid + struct Dimensions + { + //! The lower corner of the (local) grid + rvec lowerCorner; + //! The upper corner of the (local) grid + rvec upperCorner; + //! The physical grid size: upperCorner - lowerCorner + rvec gridSize; + //! An estimate for the atom number density of the region targeted by the grid + real atomDensity; + //! The maximum distance an atom can be outside of a cell and outside of the grid + real maxAtomGroupRadius; + //! Size of cell along dimension x and y + real cellSize[DIM - 1]; + //! 1/size of a cell along dimensions x and y + real invCellSize[DIM - 1]; + //! The number of grid cells along dimensions x and y + int numCells[DIM - 1]; + }; + + //! Constructs a grid given the type of pairlist + Grid(PairlistType pairlistType); + + //! Returns the geometry of the grid cells + const Geometry &geometry() const + { + return geometry_; + } + + //! Returns the dimensions of the grid + const Dimensions &dimensions() const + { + return dimensions_; + } + + //! Returns the total number of grid columns + int numColumns() const + { + return dimensions_.numCells[XX]*dimensions_.numCells[YY]; + } + + //! Returns the total number of grid cells + int numCells() const + { + return numCellsTotal_; + } + + //! Returns the cell offset of (the first cell of) this grid in the list of cells combined over all grids + int cellOffset() const + { + return cellOffset_; + } + + //! Returns the first cell index in the grid, starting at 0 in this grid + int firstCellInColumn(int columnIndex) const + { + return cxy_ind_[columnIndex]; + }; + + //! Returns the number of cells in the column + int numCellsInColumn(int columnIndex) const + { + return cxy_ind_[columnIndex + 1] - cxy_ind_[columnIndex]; + }; + + //! Returns the index of the first atom in the column + int firstAtomInColumn(int columnIndex) const + { + return (cellOffset_ + cxy_ind_[columnIndex])*geometry_.numAtomsPerCell; + }; + + //! Returns the number of real atoms in the column + int numAtomsInColumn(int columnIndex) const + { + return cxy_na_[columnIndex]; + }; + + //! Returns the number of atoms in the column including padding + int paddedNumAtomsInColumn(int columnIndex) const + { + return numCellsInColumn(columnIndex)*geometry_.numAtomsPerCell; + }; + + //! Returns the end of the atom index range on the grid, including padding + int atomIndexEnd() const + { + return (cellOffset_ + numCellsTotal_)*geometry_.numAtomsPerCell; + } + + //! Returns whether any atom in the cluster is perturbed + bool clusterIsPerturbed(int clusterIndex) const + { + return fep_[clusterIndex] != 0u; + } + + //! Returns whether the given atom in the cluster is perturbed + bool atomIsPerturbed(int clusterIndex, + int atomIndexInCluster) const + { + return (fep_[clusterIndex] & (1 << atomIndexInCluster)) != 0u; + } + + //! Returns the free-energy perturbation bits for the cluster + unsigned int fepBits(int clusterIndex) const + { + return fep_[clusterIndex]; + } + + //! Returns the i-bounding boxes for all clusters on the grid + gmx::ArrayRef iBoundingBoxes() const + { + return bb_; + } + + //! Returns the j-bounding boxes for all clusters on the grid + gmx::ArrayRef jBoundingBoxes() const + { + return bbj_; + } + + //! Returns the packed bounding boxes for all clusters on the grid, empty with a CPU list + gmx::ArrayRef packedBoundingBoxes() const + { + return pbb_; + } + + //! Returns the bounding boxes along z for all cells on the grid + gmx::ArrayRef zBoundingBoxes() const + { + return bbcz_; + } + + //! Returns the flags for all clusters on the grid + gmx::ArrayRef clusterFlags() const + { + return flags_; + } + + //! Returns the number of clusters for all cells on the grid, empty with a CPU geometry + gmx::ArrayRef numClustersPerCell() const + { + return numClusters_; + } + + //! Returns the cluster index for an atom + int atomToCluster(int atomIndex) const + { + return (atomIndex >> geometry_.numAtomsICluster2Log); + } + + //! Returns the total number of clusters on the grid + int numClusters() const + { + if (geometry_.isSimple) + { + return numCellsTotal_; + } + else + { + return numClustersTotal_; + } + } + + //! Sets the grid dimensions + void setDimensions(const nbnxn_search *nbs, + int ddZone, + int numAtoms, + const rvec lowerCorner, + const rvec upperCorner, + real atomDensity, + real maxAtomGroupRadius); + + //! Determine in which grid cells the atoms should go + void calcCellIndices(nbnxn_search *nbs, + int ddZone, + int cellOffset, + const gmx::UpdateGroupsCog *updateGroupsCog, + int atomStart, + int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + int numAtomsMoved, + const int *move, + nbnxn_atomdata_t *nbat); + + private: + /*! \brief Fill a pair search cell with atoms + * + * Potentially sorts atoms and sets the interaction flags. + */ + void fillCell(nbnxn_search *nbs, + nbnxn_atomdata_t *nbat, + int atomStart, + int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + BoundingBox gmx_unused *bb_work_aligned); + + //! Spatially sort the atoms within one grid column + void sortColumnsCpuGeometry(nbnxn_search *nbs, + int dd_zone, + int atomStart, int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + nbnxn_atomdata_t *nbat, + int cxy_start, int cxy_end, + gmx::ArrayRef sort_work); + + //! Spatially sort the atoms within one grid column + void sortColumnsGpuGeometry(nbnxn_search *nbs, + int dd_zone, + int atomStart, int atomEnd, + const int *atinfo, + gmx::ArrayRef x, + nbnxn_atomdata_t *nbat, + int cxy_start, int cxy_end, + gmx::ArrayRef sort_work); + + /* Data members */ + //! The geometry of the grid clusters and cells + Geometry geometry_; + //! The physical dimensions of the grid + Dimensions dimensions_; + + //! The total number of cells in this grid + int numCellsTotal_; + //! Index in nbs->cell corresponding to cell 0 */ + int cellOffset_; + + /* Grid data */ + //! The number of, non-filler, atoms for each grid column + std::vector cxy_na_; + //! The grid-local cell index for each grid column + std::vector cxy_ind_; + + //! The number of cluster for each cell + std::vector numClusters_; + + /* Bounding boxes */ + //! Bounding boxes in z for the cells + std::vector bbcz_; + //! 3D bounding boxes for the sub cells + std::vector < BoundingBox, gmx::AlignedAllocator < BoundingBox>> bb_; + //! 3D j-bounding boxes for the case where the i- and j-cluster sizes are different + std::vector < BoundingBox, gmx::AlignedAllocator < BoundingBox>> bbjStorage_; + //! 3D j-bounding boxes + gmx::ArrayRef bbj_; + //! 3D bounding boxes in packed xxxx format per cell + std::vector < float, gmx::AlignedAllocator < float>> pbb_; + + /* Bit-flag information */ + //! Flags for properties of clusters in each cell + std::vector flags_; + //! Signal bits for atoms in each cell that tell whether an atom is perturbed + std::vector fep_; + + /* Statistics */ + //! Total number of clusters, used for printing + int numClustersTotal_; }; +} // namespace Nbnxm + #endif diff --git a/src/gromacs/nbnxm/internal.h b/src/gromacs/nbnxm/internal.h index 68e07759b4..4b1a6963ea 100644 --- a/src/gromacs/nbnxm/internal.h +++ b/src/gromacs/nbnxm/internal.h @@ -58,7 +58,11 @@ #include "gromacs/utility/real.h" struct gmx_domdec_zones_t; -struct nbnxn_grid_t; + +namespace Nbnxm +{ +class Grid; +} // TODO Document after refactoring @@ -109,7 +113,7 @@ enum { enbsCCgrid, enbsCCsearch, enbsCCcombine, enbsCCreducef, enbsCCnr }; -/* Thread-local work struct, contains part of nbnxn_grid_t */ +/* Thread-local work struct, contains working data for Grid */ struct nbnxn_search_work_t { nbnxn_search_work_t(); @@ -139,16 +143,18 @@ struct nbnxn_search { /* \brief Constructor * - * \param[in] ePBC The periodic boundary conditions - * \param[in] n_dd_cells The number of domain decomposition cells per dimension, without DD nullptr should be passed - * \param[in] zones The domain decomposition zone setup, without DD nullptr should be passed - * \param[in] bFEP Tells whether non-bonded interactions are perturbed - * \param[in] nthread_max The maximum number of threads used in the search + * \param[in] ePBC The periodic boundary conditions + * \param[in] n_dd_cells The number of domain decomposition cells per dimension, without DD nullptr should be passed + * \param[in] zones The domain decomposition zone setup, without DD nullptr should be passed + * \param[in] nb_kernel_type The nbnxn non-bonded kernel type + * \param[in] bFEP Tells whether non-bonded interactions are perturbed + * \param[in] nthread_max The maximum number of threads used in the search */ nbnxn_search(int ePBC, const ivec *n_dd_cells, const gmx_domdec_zones_t *zones, + PairlistType pairlistType, gmx_bool bFEP, int nthread_max); @@ -160,7 +166,7 @@ struct nbnxn_search ivec dd_dim; /* Are we doing DD in x,y,z? */ const gmx_domdec_zones_t *zones; /* The domain decomposition zones */ - std::vector grid; /* Array of grids, size ngrid */ + std::vector grid; /* Array of grids, size ngrid */ std::vector cell; /* Actual allocated cell array for all grids */ std::vector a; /* Atom index for grid, the inverse of cell */ diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index 2e43db5fc6..aa851aff68 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -460,6 +460,7 @@ init_nb_verlet(const gmx::MDLogger &mdlog, std::make_unique(ir->ePBC, DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr, DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr, + listParams.pairlistType, bFEP_NonBonded, gmx_omp_nthreads_get(emntPairsearch)); diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index 26c64db34f..8e53d05ac0 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -74,7 +74,11 @@ #include "internal.h" #include "pairlistwork.h" -using namespace gmx; // TODO: Remove when this file is moved into gmx namespace +using namespace gmx; // TODO: Remove when this file is moved into gmx namespace + +using nbnxn_bb_t = Nbnxm::BoundingBox; // TODO: Remove when refactoring this file + +using Grid = Nbnxm::Grid; // TODO: Remove when refactoring this file // Convience alias for partial Nbnxn namespace usage using InteractionLocality = Nbnxm::InteractionLocality; @@ -297,6 +301,7 @@ nbnxn_search_work_t::~nbnxn_search_work_t() nbnxn_search::nbnxn_search(int ePBC, const ivec *n_dd_cells, const gmx_domdec_zones_t *zones, + const PairlistType pairlistType, gmx_bool bFEP, int nthread_max) : bFEP(bFEP), @@ -325,7 +330,7 @@ nbnxn_search::nbnxn_search(int ePBC, } } - grid.resize(numGrids); + grid.resize(numGrids, pairlistType); /* Initialize detailed nbsearch cycle counting */ print_cycles = (getenv("GMX_NBNXN_CYCLE") != nullptr); @@ -355,10 +360,11 @@ static void init_buffer_flags(nbnxn_buffer_flags_t *flags, * assigned to (when atom groups instead of individual atoms are assigned * to cells), this distance returned can be larger than the input. */ -static real listRangeForBoundingBoxToGridCell(real rlist, - const nbnxn_grid_t &grid) +static real +listRangeForBoundingBoxToGridCell(real rlist, + const Grid::Dimensions &gridDims) { - return rlist + grid.maxAtomGroupRadius; + return rlist + gridDims.maxAtomGroupRadius; } /* Returns the pair-list cutoff between a grid cells given an atom-to-atom pair-list cutoff @@ -369,11 +375,12 @@ static real listRangeForBoundingBoxToGridCell(real rlist, * assigned to (when atom groups instead of individual atoms are assigned * to cells), this distance returned can be larger than the input. */ -static real listRangeForGridCellToGridCell(real rlist, - const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid) +static real +listRangeForGridCellToGridCell(real rlist, + const Grid::Dimensions &iGridDims, + const Grid::Dimensions &jGridDims) { - return rlist + iGrid.maxAtomGroupRadius + jGrid.maxAtomGroupRadius; + return rlist + iGridDims.maxAtomGroupRadius + jGridDims.maxAtomGroupRadius; } /* Determines the cell range along one dimension that @@ -381,22 +388,22 @@ static real listRangeForGridCellToGridCell(real rlist, */ template static void get_cell_range(real b0, real b1, - const nbnxn_grid_t &jGrid, + const Grid::Dimensions &jGridDims, real d2, real rlist, int *cf, int *cl) { - real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid)); - real distanceInCells = (b0 - jGrid.c0[dim])*jGrid.invCellSize[dim]; + real listRangeBBToCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGridDims)); + real distanceInCells = (b0 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim]; *cf = std::max(static_cast(distanceInCells), 0); while (*cf > 0 && - d2 + gmx::square((b0 - jGrid.c0[dim]) - (*cf - 1 + 1)*jGrid.cellSize[dim]) < listRangeBBToCell2) + d2 + gmx::square((b0 - jGridDims.lowerCorner[dim]) - (*cf - 1 + 1)*jGridDims.cellSize[dim]) < listRangeBBToCell2) { (*cf)--; } - *cl = std::min(static_cast((b1 - jGrid.c0[dim])*jGrid.invCellSize[dim]), jGrid.numCells[dim] - 1); - while (*cl < jGrid.numCells[dim] - 1 && - d2 + gmx::square((*cl + 1)*jGrid.cellSize[dim] - (b1 - jGrid.c0[dim])) < listRangeBBToCell2) + *cl = std::min(static_cast((b1 - jGridDims.lowerCorner[dim])*jGridDims.invCellSize[dim]), jGridDims.numCells[dim] - 1); + while (*cl < jGridDims.numCells[dim] - 1 && + d2 + gmx::square((*cl + 1)*jGridDims.cellSize[dim] - (b1 - jGridDims.lowerCorner[dim])) < listRangeBBToCell2) { (*cl)++; } @@ -865,27 +872,23 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list) static void print_nblist_statistics(FILE *fp, const NbnxnPairlistCpu *nbl, const nbnxn_search *nbs, real rl) { - const nbnxn_grid_t *grid; - int cs[SHIFTS]; - int npexcl; - - grid = &nbs->grid[0]; + const Grid &grid = nbs->grid[0]; + const Grid::Dimensions &dims = grid.dimensions(); fprintf(fp, "nbl nci %zu ncj %d\n", nbl->ci.size(), nbl->ncjInUse); + const int numAtomsJCluster = grid.geometry().numAtomsJCluster; + const double numAtomsPerCell = nbl->ncjInUse/static_cast(grid.numCells())*numAtomsJCluster; fprintf(fp, "nbl na_cj %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n", - nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast(grid->nc), - nbl->ncjInUse/static_cast(grid->nc)*grid->na_cj, - nbl->ncjInUse/static_cast(grid->nc)*grid->na_cj/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_cj/(grid->size[XX]*grid->size[YY]*grid->size[ZZ]))); + nbl->na_cj, rl, nbl->ncjInUse, nbl->ncjInUse/static_cast(grid.numCells()), + numAtomsPerCell, + numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numCells()*numAtomsJCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ]))); fprintf(fp, "nbl average j cell list length %.1f\n", 0.25*nbl->ncjInUse/std::max(static_cast(nbl->ci.size()), 1.0)); - for (int s = 0; s < SHIFTS; s++) - { - cs[s] = 0; - } - npexcl = 0; + int cs[SHIFTS] = { 0 }; + int npexcl = 0; for (const nbnxn_ci_t &ciEntry : nbl->ci) { cs[ciEntry.shift & NBNXN_CI_SHIFT] += @@ -914,29 +917,22 @@ static void print_nblist_statistics(FILE *fp, const NbnxnPairlistCpu *nbl, static void print_nblist_statistics(FILE *fp, const NbnxnPairlistGpu *nbl, const nbnxn_search *nbs, real rl) { - const nbnxn_grid_t *grid; - int b; - int c[c_gpuNumClusterPerCell + 1]; - double sum_nsp, sum_nsp2; - int nsp_max; - - /* This code only produces correct statistics with domain decomposition */ - grid = &nbs->grid[0]; + const Grid &grid = nbs->grid[0]; + const Grid::Dimensions &dims = grid.dimensions(); fprintf(fp, "nbl nsci %zu ncj4 %zu nsi %d excl4 %zu\n", nbl->sci.size(), nbl->cj4.size(), nbl->nci_tot, nbl->excl.size()); + const int numAtomsCluster = grid.geometry().numAtomsICluster; + const double numAtomsPerCell = nbl->nci_tot/static_cast(grid.numClusters())*numAtomsCluster; fprintf(fp, "nbl na_c %d rl %g ncp %d per cell %.1f atoms %.1f ratio %.2f\n", - nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast(grid->nsubc_tot), - nbl->nci_tot/static_cast(grid->nsubc_tot)*grid->na_c, - nbl->nci_tot/static_cast(grid->nsubc_tot)*grid->na_c/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nsubc_tot*grid->na_c/(grid->size[XX]*grid->size[YY]*grid->size[ZZ]))); - - sum_nsp = 0; - sum_nsp2 = 0; - nsp_max = 0; - for (int si = 0; si <= c_gpuNumClusterPerCell; si++) - { - c[si] = 0; - } + nbl->na_ci, rl, nbl->nci_tot, nbl->nci_tot/static_cast(grid.numClusters()), + numAtomsPerCell, + numAtomsPerCell/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid.numClusters()*numAtomsCluster/(dims.gridSize[XX]*dims.gridSize[YY]*dims.gridSize[ZZ]))); + + double sum_nsp = 0; + double sum_nsp2 = 0; + int nsp_max = 0; + int c[c_gpuNumClusterPerCell + 1] = { 0 }; for (const nbnxn_sci_t &sci : nbl->sci) { int nsp = 0; @@ -944,7 +940,7 @@ static void print_nblist_statistics(FILE *fp, const NbnxnPairlistGpu *nbl, { for (int j = 0; j < c_nbnxnGpuJgroupSize; j++) { - b = 0; + int b = 0; for (int si = 0; si < c_gpuNumClusterPerCell; si++) { if (nbl->cj4[j4].imei[0].imask & (1U << (j*c_gpuNumClusterPerCell + si))) @@ -970,7 +966,7 @@ static void print_nblist_statistics(FILE *fp, const NbnxnPairlistGpu *nbl, if (!nbl->cj4.empty()) { - for (b = 0; b <= c_gpuNumClusterPerCell; b++) + for (int b = 0; b <= c_gpuNumClusterPerCell; b++) { fprintf(fp, "nbl j-list #i-subcell %d %7d %4.1f\n", b, c[b], 100.0*c[b]/size_t {nbl->cj4.size()*c_nbnxnGpuJgroupSize}); @@ -1087,7 +1083,7 @@ gmx_unused static unsigned int get_imask_simd_j8(gmx_bool rdiag, int ci, int cj) * \param[in,out] numDistanceChecks The number of distance checks performed */ static void -makeClusterListSimple(const nbnxn_grid_t &jGrid, +makeClusterListSimple(const Grid &jGrid, NbnxnPairlistCpu * nbl, int icluster, int jclusterFirst, @@ -1106,7 +1102,7 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, InRange = FALSE; while (!InRange && jclusterFirst <= jclusterLast) { - real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bb); + real d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes()); *numDistanceChecks += 2; /* Check if the distance is within the distance where @@ -1120,7 +1116,7 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - int cjf_gl = jGrid.cell0 + jclusterFirst; + int cjf_gl = jGrid.cellOffset() + jclusterFirst; for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++) { for (int j = 0; j < c_nbnxnCpuIClusterSize; j++) @@ -1146,7 +1142,7 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, InRange = FALSE; while (!InRange && jclusterLast > jclusterFirst) { - real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bb); + real d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes()); *numDistanceChecks += 2; /* Check if the distance is within the distance where @@ -1160,7 +1156,7 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - int cjl_gl = jGrid.cell0 + jclusterLast; + int cjl_gl = jGrid.cellOffset() + jclusterLast; for (int i = 0; i < c_nbnxnCpuIClusterSize && !InRange; i++) { for (int j = 0; j < c_nbnxnCpuIClusterSize; j++) @@ -1185,7 +1181,7 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, { /* Store cj and the interaction mask */ nbnxn_cj_t cjEntry; - cjEntry.cj = jGrid.cell0 + jcluster; + cjEntry.cj = jGrid.cellOffset() + jcluster; cjEntry.excl = get_imask(excludeSubDiagonal, icluster, jcluster); nbl->cj.push_back(cjEntry); } @@ -1204,8 +1200,8 @@ makeClusterListSimple(const nbnxn_grid_t &jGrid, /* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj. * Checks bounding box distances and possibly atom pair distances. */ -static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid, +static void make_cluster_list_supersub(const Grid &iGrid, + const Grid &jGrid, NbnxnPairlistGpu *nbl, const int sci, const int scj, @@ -1224,8 +1220,8 @@ static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid, const nbnxn_bb_t *bb_ci = work.iSuperClusterData.bb.data(); #endif - assert(c_nbnxnGpuClusterSize == iGrid.na_c); - assert(c_nbnxnGpuClusterSize == jGrid.na_c); + assert(c_nbnxnGpuClusterSize == iGrid.geometry().numAtomsICluster); + assert(c_nbnxnGpuClusterSize == jGrid.geometry().numAtomsICluster); /* We generate the pairlist mainly based on bounding-box distances * and do atom pair distance based pruning on the GPU. @@ -1241,13 +1237,13 @@ static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid, float *d2l = work.distanceBuffer.data(); - for (int subc = 0; subc < jGrid.nsubc[scj]; subc++) + for (int subc = 0; subc < jGrid.numClustersPerCell()[scj]; subc++) { const int cj4_ind = work.cj_ind/c_nbnxnGpuJgroupSize; const int cj_offset = work.cj_ind - cj4_ind*c_nbnxnGpuJgroupSize; const int cj = scj*c_gpuNumClusterPerCell + subc; - const int cj_gl = jGrid.cell0*c_gpuNumClusterPerCell + cj; + const int cj_gl = jGrid.cellOffset()*c_gpuNumClusterPerCell + cj; int ci1; if (excludeSubDiagonal && sci == scj) @@ -1256,12 +1252,12 @@ static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid, } else { - ci1 = iGrid.nsubc[sci]; + ci1 = iGrid.numClustersPerCell()[sci]; } #if NBNXN_BBXXXX /* Determine all ci1 bb distances in one call with SIMD4 */ - subc_bb_dist2_simd4_xxxx(jGrid.pbb.data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)), + subc_bb_dist2_simd4_xxxx(jGrid.packedBoundingBoxes().data() + (cj >> STRIDE_PBB_2LOG)*NNBSBB_XXXX + (cj & (STRIDE_PBB-1)), ci1, pbb_ci, d2l); *numDistanceChecks += c_nbnxnGpuClusterSize*2; #endif @@ -1278,7 +1274,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t &iGrid, #if !NBNXN_BBXXXX /* Determine the bb distance between ci and cj */ - d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.bb); + d2l[ci] = subc_bb_dist2(ci, bb_ci, cj, jGrid.jBoundingBoxes()); *numDistanceChecks += 2; #endif float d2 = d2l[ci]; @@ -1598,13 +1594,13 @@ static void make_fep_list(const nbnxn_search *nbs, real gmx_unused shy, real gmx_unused shz, real gmx_unused rlist_fep2, - const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid, + const Grid &iGrid, + const Grid &jGrid, t_nblist *nlist) { int ci, cj_ind_start, cj_ind_end, cja, cjr; int nri_max; - int ngid, gid_i = 0, gid_j, gid; + int gid_i = 0, gid_j, gid; int egp_shift, egp_mask; int gid_cj = 0; int ind_i, ind_j, ai, aj; @@ -1633,14 +1629,19 @@ static void make_fep_list(const nbnxn_search *nbs, reallocate_nblist(nlist); } + const int numAtomsJCluster = jGrid.geometry().numAtomsJCluster; + const nbnxn_atomdata_t::Params &nbatParams = nbat->params(); - ngid = nbatParams.nenergrp; + const int ngid = nbatParams.nenergrp; - if (ngid*jGrid.na_cj > gmx::index(sizeof(gid_cj)*8)) + /* TODO: Consider adding a check in grompp and changing this to an assert */ + const int numBitsInEnergyGroupIdsForAtomsInJCluster = sizeof(gid_cj)*8; + if (ngid*numAtomsJCluster > numBitsInEnergyGroupIdsForAtomsInJCluster) { gmx_fatal(FARGS, "The Verlet scheme with %dx%d kernels and free-energy only supports up to %zu energy groups", - iGrid.na_c, jGrid.na_cj, (sizeof(gid_cj)*8)/jGrid.na_cj); + iGrid.geometry().numAtomsICluster, numAtomsJCluster, + (sizeof(gid_cj)*8)/numAtomsJCluster); } egp_shift = nbatParams.neg_2log; @@ -1661,7 +1662,7 @@ static void make_fep_list(const nbnxn_search *nbs, nlist->gid[nri] = 0; nlist->shift[nri] = nbl_ci->shift & NBNXN_CI_SHIFT; - bFEP_i = ((iGrid.fep[ci - iGrid.cell0] & (1 << i)) != 0u); + bFEP_i = iGrid.atomIsPerturbed(ci - iGrid.cellOffset(), i); bFEP_i_all = bFEP_i_all && bFEP_i; @@ -1683,33 +1684,33 @@ static void make_fep_list(const nbnxn_search *nbs, cja = nbl->cj[cj_ind].cj; - if (jGrid.na_cj == jGrid.na_c) + if (numAtomsJCluster == jGrid.geometry().numAtomsICluster) { - cjr = cja - jGrid.cell0; - fep_cj = jGrid.fep[cjr]; + cjr = cja - jGrid.cellOffset(); + fep_cj = jGrid.fepBits(cjr); if (ngid > 1) { gid_cj = nbatParams.energrp[cja]; } } - else if (2*jGrid.na_cj == jGrid.na_c) + else if (2*numAtomsJCluster == jGrid.geometry().numAtomsICluster) { - cjr = cja - jGrid.cell0*2; + cjr = cja - jGrid.cellOffset()*2; /* Extract half of the ci fep/energrp mask */ - fep_cj = (jGrid.fep[cjr>>1] >> ((cjr&1)*jGrid.na_cj)) & ((1<> 1) >> ((cjr & 1)*numAtomsJCluster)) & ((1 << numAtomsJCluster) - 1); if (ngid > 1) { - gid_cj = nbatParams.energrp[cja>>1] >> ((cja&1)*jGrid.na_cj*egp_shift) & ((1<<(jGrid.na_cj*egp_shift)) - 1); + gid_cj = nbatParams.energrp[cja >> 1] >> ((cja & 1)*numAtomsJCluster*egp_shift) & ((1 << (numAtomsJCluster*egp_shift)) - 1); } } else { - cjr = cja - (jGrid.cell0>>1); + cjr = cja - (jGrid.cellOffset() >> 1); /* Combine two ci fep masks/energrp */ - fep_cj = jGrid.fep[cjr*2] + (jGrid.fep[cjr*2+1] << jGrid.na_c); + fep_cj = jGrid.fepBits(cjr*2) + (jGrid.fepBits(cjr*2 + 1) << jGrid.geometry().numAtomsICluster); if (ngid > 1) { - gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.na_c*egp_shift)); + gid_cj = nbatParams.energrp[cja*2] + (nbatParams.energrp[cja*2+1] << (jGrid.geometry().numAtomsICluster*egp_shift)); } } @@ -1806,8 +1807,8 @@ static void make_fep_list(const nbnxn_search *nbs, real shy, real shz, real rlist_fep2, - const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid, + const Grid &iGrid, + const Grid &jGrid, t_nblist *nlist) { int nri_max; @@ -1862,7 +1863,7 @@ static void make_fep_list(const nbnxn_search *nbs, nlist->gid[nri] = 0; nlist->shift[nri] = nbl_sci->shift & NBNXN_CI_SHIFT; - bFEP_i = ((iGrid.fep[c_abs - iGrid.cell0*c_gpuNumClusterPerCell] & (1 << i)) != 0u); + bFEP_i = iGrid.atomIsPerturbed(c_abs - iGrid.cellOffset()*c_gpuNumClusterPerCell, i); xi = nbat->x()[ind_i*nbat->xstride+XX] + shx; yi = nbat->x()[ind_i*nbat->xstride+YY] + shy; @@ -1882,8 +1883,6 @@ static void make_fep_list(const nbnxn_search *nbs, for (int gcj = 0; gcj < c_nbnxnGpuJgroupSize; gcj++) { - unsigned int fep_cj; - if ((cj4->imei[0].imask & (1U << (gcj*c_gpuNumClusterPerCell + c))) == 0) { /* Skip this ci for this cj */ @@ -1891,19 +1890,17 @@ static void make_fep_list(const nbnxn_search *nbs, } const int cjr = - cj4->cj[gcj] - jGrid.cell0*c_gpuNumClusterPerCell; - - fep_cj = jGrid.fep[cjr]; + cj4->cj[gcj] - jGrid.cellOffset()*c_gpuNumClusterPerCell; - if (bFEP_i || fep_cj != 0) + if (bFEP_i || jGrid.clusterIsPerturbed(cjr)) { for (int j = 0; j < nbl->na_cj; j++) { /* Is this interaction perturbed and not excluded? */ - ind_j = (jGrid.cell0*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j; + ind_j = (jGrid.cellOffset()*c_gpuNumClusterPerCell + cjr)*nbl->na_cj + j; aj = nbs->a[ind_j]; if (aj >= 0 && - (bFEP_i || (fep_cj & (1 << j))) && + (bFEP_i || jGrid.atomIsPerturbed(cjr, j)) && (!bDiagRemoved || ind_j >= ind_i)) { int excl_pair; @@ -2368,12 +2365,13 @@ static inline void set_icell_bb_simple(gmx::ArrayRef bb, } /* Sets a simple list i-cell bounding box, including PBC shift */ -static inline void set_icell_bb(const nbnxn_grid_t &iGrid, +static inline void set_icell_bb(const Grid &iGrid, int ci, real shx, real shy, real shz, NbnxnPairlistCpuWork *work) { - set_icell_bb_simple(iGrid.bb, ci, shx, shy, shz, &work->iClusterData.bb[0]); + set_icell_bb_simple(iGrid.iBoundingBoxes(), ci, shx, shy, shz, + &work->iClusterData.bb[0]); } #if NBNXN_BBXXXX @@ -2414,16 +2412,16 @@ gmx_unused static void set_icell_bb_supersub(gmx::ArrayRef bb, } /* Sets a super-cell and sub cell bounding boxes, including PBC shift */ -gmx_unused static void set_icell_bb(const nbnxn_grid_t &iGrid, +gmx_unused static void set_icell_bb(const Grid &iGrid, int ci, real shx, real shy, real shz, NbnxnPairlistGpuWork *work) { #if NBNXN_BBXXXX - set_icell_bbxxxx_supersub(iGrid.pbb, ci, shx, shy, shz, + set_icell_bbxxxx_supersub(iGrid.packedBoundingBoxes(), ci, shx, shy, shz, work->iSuperClusterData.bbPacked.data()); #else - set_icell_bb_supersub(iGrid.bb, ci, shx, shy, shz, + set_icell_bb_supersub(iGrid.iBoundingBoxes(), ci, shx, shy, shz, work->iSuperClusterData.bb.data()); #endif } @@ -2514,21 +2512,23 @@ static void icell_set_x(int ci, #endif /* !GMX_SIMD4_HAVE_REAL */ } -static real minimum_subgrid_size_xy(const nbnxn_grid_t &grid) +static real minimum_subgrid_size_xy(const Grid &grid) { - if (grid.bSimple) + const Grid::Dimensions &dims = grid.dimensions(); + + if (grid.geometry().isSimple) { - return std::min(grid.cellSize[XX], grid.cellSize[YY]); + return std::min(dims.cellSize[XX], dims.cellSize[YY]); } else { - return std::min(grid.cellSize[XX]/c_gpuNumClusterPerCellX, - grid.cellSize[YY]/c_gpuNumClusterPerCellY); + return std::min(dims.cellSize[XX]/c_gpuNumClusterPerCellX, + dims.cellSize[YY]/c_gpuNumClusterPerCellY); } } -static real effective_buffer_1x1_vs_MxN(const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid) +static real effective_buffer_1x1_vs_MxN(const Grid &iGrid, + const Grid &jGrid) { const real eff_1x1_buffer_fac_overest = 0.1; @@ -2614,7 +2614,7 @@ static void get_nsubpair_target(const nbnxn_search *nbs, const int nsubpair_target_min = 36; real r_eff_sup, vol_est, nsp_est, nsp_est_nl; - const nbnxn_grid_t &grid = nbs->grid[0]; + const Grid &grid = nbs->grid[0]; /* We don't need to balance list sizes if: * - We didn't request balancing. @@ -2622,7 +2622,7 @@ static void get_nsubpair_target(const nbnxn_search *nbs, * since we will always generate at least #cells lists. * - We don't have any cells, since then there won't be any lists. */ - if (min_ci_balanced <= 0 || grid.nc >= min_ci_balanced || grid.nc == 0) + if (min_ci_balanced <= 0 || grid.numCells() >= min_ci_balanced || grid.numCells() == 0) { /* nsubpair_target==0 signals no balancing */ *nsubpair_target = 0; @@ -2631,13 +2631,16 @@ static void get_nsubpair_target(const nbnxn_search *nbs, return; } - gmx::RVec ls; - ls[XX] = (grid.c1[XX] - grid.c0[XX])/(grid.numCells[XX]*c_gpuNumClusterPerCellX); - ls[YY] = (grid.c1[YY] - grid.c0[YY])/(grid.numCells[YY]*c_gpuNumClusterPerCellY); - ls[ZZ] = grid.na_c/(grid.atom_density*ls[XX]*ls[YY]); + gmx::RVec ls; + const int numAtomsCluster = grid.geometry().numAtomsICluster; + const Grid::Dimensions &dims = grid.dimensions(); + + ls[XX] = dims.cellSize[XX]/c_gpuNumClusterPerCellX; + ls[YY] = dims.cellSize[YY]/c_gpuNumClusterPerCellY; + ls[ZZ] = numAtomsCluster/(dims.atomDensity*ls[XX]*ls[YY]); /* The formulas below are a heuristic estimate of the average nsj per si*/ - r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(grid.na_c, ls); + r_eff_sup = rlist + nbnxn_get_rlist_effective_inc(numAtomsCluster, ls); if (!nbs->DomDec || nbs->zones->n == 1) { @@ -2646,7 +2649,7 @@ static void get_nsubpair_target(const nbnxn_search *nbs, else { nsp_est_nl = - gmx::square(grid.atom_density/grid.na_c)* + gmx::square(dims.atomDensity/numAtomsCluster)* nonlocal_vol2(nbs->zones, ls, r_eff_sup); } @@ -2664,7 +2667,7 @@ static void get_nsubpair_target(const nbnxn_search *nbs, /* Estimate the number of cluster pairs as the local number of * clusters times the volume they interact with times the density. */ - nsp_est = grid.nsubc_tot*vol_est*grid.atom_density/grid.na_c; + nsp_est = grid.numClusters()*vol_est*dims.atomDensity/numAtomsCluster; /* Subtract the non-local pair count */ nsp_est -= nsp_est_nl; @@ -2678,7 +2681,7 @@ static void get_nsubpair_target(const nbnxn_search *nbs, * groups of atoms we'll anyhow be limited by nsubpair_target_min, * so this overestimation will not matter. */ - nsp_est = std::max(nsp_est, grid.nsubc_tot*14._real); + nsp_est = std::max(nsp_est, grid.numClusters()*14._real); if (debug) { @@ -2948,7 +2951,7 @@ static void balance_fep_lists(const nbnxn_search *nbs, } /* Returns the next ci to be processes by our thread */ -static gmx_bool next_ci(const nbnxn_grid_t &grid, +static gmx_bool next_ci(const Grid &grid, int nth, int ci_block, int *ci_x, int *ci_y, int *ci_b, int *ci) @@ -2963,15 +2966,15 @@ static gmx_bool next_ci(const nbnxn_grid_t &grid, *ci_b = 0; } - if (*ci >= grid.nc) + if (*ci >= grid.numCells()) { return FALSE; } - while (*ci >= grid.cxy_ind[*ci_x*grid.numCells[YY] + *ci_y + 1]) + while (*ci >= grid.firstCellInColumn(*ci_x*grid.dimensions().numCells[YY] + *ci_y + 1)) { *ci_y += 1; - if (*ci_y == grid.numCells[YY]) + if (*ci_y == grid.dimensions().numCells[YY]) { *ci_x += 1; *ci_y = 0; @@ -2984,10 +2987,10 @@ static gmx_bool next_ci(const nbnxn_grid_t &grid, /* Returns the distance^2 for which we put cell pairs in the list * without checking atom pair distances. This is usually < rlist^2. */ -static float boundingbox_only_distance2(const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid, - real rlist, - gmx_bool simple) +static float boundingbox_only_distance2(const Grid::Dimensions &iGridDims, + const Grid::Dimensions &jGridDims, + real rlist, + gmx_bool simple) { /* If the distance between two sub-cell bounding boxes is less * than this distance, do not check the distance between @@ -3005,8 +3008,8 @@ static float boundingbox_only_distance2(const nbnxn_grid_t &iGrid, real bbx, bby; real rbb2; - bbx = 0.5*(iGrid.cellSize[XX] + jGrid.cellSize[XX]); - bby = 0.5*(iGrid.cellSize[YY] + jGrid.cellSize[YY]); + bbx = 0.5*(iGridDims.cellSize[XX] + jGridDims.cellSize[XX]); + bby = 0.5*(iGridDims.cellSize[YY] + jGridDims.cellSize[YY]); if (!simple) { bbx /= c_gpuNumClusterPerCellX; @@ -3023,7 +3026,7 @@ static float boundingbox_only_distance2(const nbnxn_grid_t &iGrid, #endif } -static int get_ci_block_size(const nbnxn_grid_t &iGrid, +static int get_ci_block_size(const Grid &iGrid, gmx_bool bDomDec, int nth) { const int ci_block_enum = 5; @@ -3042,23 +3045,25 @@ static int get_ci_block_size(const nbnxn_grid_t &iGrid, * zone boundaries with 3D domain decomposition. At the same time * the blocks will not become too small. */ - ci_block = (iGrid.nc*ci_block_enum)/(ci_block_denom*iGrid.numCells[XX]*nth); + ci_block = (iGrid.numCells()*ci_block_enum)/(ci_block_denom*iGrid.dimensions().numCells[XX]*nth); + + const int numAtomsPerCell = iGrid.geometry().numAtomsPerCell; /* Ensure the blocks are not too small: avoids cache invalidation */ - if (ci_block*iGrid.na_sc < ci_block_min_atoms) + if (ci_block*numAtomsPerCell < ci_block_min_atoms) { - ci_block = (ci_block_min_atoms + iGrid.na_sc - 1)/iGrid.na_sc; + ci_block = (ci_block_min_atoms + numAtomsPerCell - 1)/numAtomsPerCell; } /* Without domain decomposition * or with less than 3 blocks per task, divide in nth blocks. */ - if (!bDomDec || nth*3*ci_block > iGrid.nc) + if (!bDomDec || nth*3*ci_block > iGrid.numCells()) { - ci_block = (iGrid.nc + nth - 1)/nth; + ci_block = (iGrid.numCells() + nth - 1)/nth; } - if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.nc) + if (ci_block > 1 && (nth - 1)*ci_block >= iGrid.numCells()) { /* Some threads have no work. Although reducing the block size * does not decrease the block count on the first few threads, @@ -3098,18 +3103,18 @@ static bool pairlistIsSimple(const NbnxnPairlistGpu gmx_unused &pairlist) return false; } -static void makeClusterListWrapper(NbnxnPairlistCpu *nbl, - const nbnxn_grid_t gmx_unused &iGrid, - const int ci, - const nbnxn_grid_t &jGrid, - const int firstCell, - const int lastCell, - const bool excludeSubDiagonal, - const nbnxn_atomdata_t *nbat, - const real rlist2, - const real rbb2, - const Nbnxm::KernelType kernelType, - int *numDistanceChecks) +static void makeClusterListWrapper(NbnxnPairlistCpu *nbl, + const Grid gmx_unused &iGrid, + const int ci, + const Grid &jGrid, + const int firstCell, + const int lastCell, + const bool excludeSubDiagonal, + const nbnxn_atomdata_t *nbat, + const real rlist2, + const real rbb2, + const Nbnxm::KernelType kernelType, + int *numDistanceChecks) { switch (kernelType) { @@ -3146,18 +3151,18 @@ static void makeClusterListWrapper(NbnxnPairlistCpu *nbl, } } -static void makeClusterListWrapper(NbnxnPairlistGpu *nbl, - const nbnxn_grid_t &gmx_unused iGrid, - const int ci, - const nbnxn_grid_t &jGrid, - const int firstCell, - const int lastCell, - const bool excludeSubDiagonal, - const nbnxn_atomdata_t *nbat, - const real rlist2, - const real rbb2, - Nbnxm::KernelType gmx_unused kernelType, - int *numDistanceChecks) +static void makeClusterListWrapper(NbnxnPairlistGpu *nbl, + const Grid &gmx_unused iGrid, + const int ci, + const Grid &jGrid, + const int firstCell, + const int lastCell, + const bool excludeSubDiagonal, + const nbnxn_atomdata_t *nbat, + const real rlist2, + const real rbb2, + Nbnxm::KernelType gmx_unused kernelType, + int *numDistanceChecks) { for (int cj = firstCell; cj <= lastCell; cj++) { @@ -3236,8 +3241,8 @@ static void setBufferFlags(const NbnxnPairlistGpu gmx_unused &nbl, /* Generates the part of pair-list nbl assigned to our thread */ template static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, - const nbnxn_grid_t &iGrid, - const nbnxn_grid_t &jGrid, + const Grid &iGrid, + const Grid &jGrid, nbnxn_search_work_t *work, const nbnxn_atomdata_t *nbat, const t_blocka &exclusions, @@ -3269,14 +3274,15 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, nbs_cycle_start(&work->cc[enbsCCsearch]); - if (jGrid.bSimple != pairlistIsSimple(*nbl) || - iGrid.bSimple != pairlistIsSimple(*nbl)) + if (jGrid.geometry().isSimple != pairlistIsSimple(*nbl) || + iGrid.geometry().isSimple != pairlistIsSimple(*nbl)) { gmx_incons("Grid incompatible with pair-list"); } sync_work(nbl); - GMX_ASSERT(nbl->na_ci == jGrid.na_c, "The cluster sizes in the list and grid should match"); + GMX_ASSERT(nbl->na_ci == jGrid.geometry().numAtomsICluster, + "The cluster sizes in the list and grid should match"); nbl->na_cj = Nbnxm::JClusterSizePerKernelType[kernelType]; na_cj_2log = get_2log(nbl->na_cj); @@ -3312,7 +3318,10 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, rl_fep2 = rl_fep2*rl_fep2; } - rbb2 = boundingbox_only_distance2(iGrid, jGrid, nbl->rlist, pairlistIsSimple(*nbl)); + const Grid::Dimensions &iGridDims = iGrid.dimensions(); + const Grid::Dimensions &jGridDims = jGrid.dimensions(); + + rbb2 = boundingbox_only_distance2(iGridDims, jGridDims, nbl->rlist, pairlistIsSimple(*nbl)); if (debug) { @@ -3333,7 +3342,8 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } else { - const real listRangeCellToCell = listRangeForGridCellToGridCell(rlist, iGrid, jGrid); + const real listRangeCellToCell = + listRangeForGridCellToGridCell(rlist, iGrid.dimensions(), jGrid.dimensions()); if (d == XX && box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < listRangeCellToCell) { @@ -3351,30 +3361,30 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, gmx::ArrayRef pbb_i; if (bSimple) { - bb_i = iGrid.bb; + bb_i = iGrid.iBoundingBoxes(); } else { - pbb_i = iGrid.pbb; + pbb_i = iGrid.packedBoundingBoxes(); } #else /* We use the normal bounding box format for both grid types */ - bb_i = iGrid.bb; + bb_i = iGrid.iBoundingBoxes(); #endif - gmx::ArrayRef bbcz_i = iGrid.bbcz; - gmx::ArrayRef flags_i = iGrid.flags; - gmx::ArrayRef bbcz_j = jGrid.bbcz; - int cell0_i = iGrid.cell0; + gmx::ArrayRef bbcz_i = iGrid.zBoundingBoxes(); + gmx::ArrayRef flags_i = iGrid.clusterFlags(); + gmx::ArrayRef bbcz_j = jGrid.zBoundingBoxes(); + int cell0_i = iGrid.cellOffset(); if (debug) { fprintf(debug, "nbl nc_i %d col.av. %.1f ci_block %d\n", - iGrid.nc, iGrid.nc/static_cast(iGrid.numCells[XX]*iGrid.numCells[YY]), ci_block); + iGrid.numCells(), iGrid.numCells()/static_cast(iGrid.numColumns()), ci_block); } numDistanceChecks = 0; - const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid)); + const real listRangeBBToJCell2 = gmx::square(listRangeForBoundingBoxToGridCell(rlist, jGrid.dimensions())); /* Initially ci_b and ci to 1 before where we want them to start, * as they will both be incremented in next_ci. @@ -3401,11 +3411,11 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } else { - bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX]; + bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX]; } - if (bx1 < jGrid.c0[XX]) + if (bx1 < jGridDims.lowerCorner[XX]) { - d2cx = gmx::square(jGrid.c0[XX] - bx1); + d2cx = gmx::square(jGridDims.lowerCorner[XX] - bx1); if (d2cx >= listRangeBBToJCell2) { @@ -3414,7 +3424,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } } - ci_xy = ci_x*iGrid.numCells[YY] + ci_y; + ci_xy = ci_x*iGridDims.numCells[YY] + ci_y; /* Loop over shift vectors in three dimensions */ for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++) @@ -3444,7 +3454,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, continue; } - bz1_frac = bz1/(iGrid.cxy_ind[ci_xy+1] - iGrid.cxy_ind[ci_xy]); + bz1_frac = bz1/iGrid.numCellsInColumn(ci_xy); if (bz1_frac < 0) { bz1_frac = 0; @@ -3462,12 +3472,12 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } else { - by0 = iGrid.c0[YY] + (ci_y )*iGrid.cellSize[YY] + shy; - by1 = iGrid.c0[YY] + (ci_y+1)*iGrid.cellSize[YY] + shy; + by0 = iGridDims.lowerCorner[YY] + (ci_y )*iGridDims.cellSize[YY] + shy; + by1 = iGridDims.lowerCorner[YY] + (ci_y + 1)*iGridDims.cellSize[YY] + shy; } get_cell_range(by0, by1, - jGrid, + jGridDims, d2z_cx, rlist, &cyf, &cyl); @@ -3477,13 +3487,13 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } d2z_cy = d2z; - if (by1 < jGrid.c0[YY]) + if (by1 < jGridDims.lowerCorner[YY]) { - d2z_cy += gmx::square(jGrid.c0[YY] - by1); + d2z_cy += gmx::square(jGridDims.lowerCorner[YY] - by1); } - else if (by0 > jGrid.c1[YY]) + else if (by0 > jGridDims.upperCorner[YY]) { - d2z_cy += gmx::square(by0 - jGrid.c1[YY]); + d2z_cy += gmx::square(by0 - jGridDims.upperCorner[YY]); } for (int tx = -shp[XX]; tx <= shp[XX]; tx++) @@ -3506,12 +3516,12 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, } else { - bx0 = iGrid.c0[XX] + (ci_x )*iGrid.cellSize[XX] + shx; - bx1 = iGrid.c0[XX] + (ci_x+1)*iGrid.cellSize[XX] + shx; + bx0 = iGridDims.lowerCorner[XX] + (ci_x )*iGridDims.cellSize[XX] + shx; + bx1 = iGridDims.lowerCorner[XX] + (ci_x+1)*iGridDims.cellSize[XX] + shx; } get_cell_range(bx0, bx1, - jGrid, + jGridDims, d2z_cy, rlist, &cxf, &cxl); @@ -3542,13 +3552,13 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, for (int cx = cxf; cx <= cxl; cx++) { d2zx = d2z; - if (jGrid.c0[XX] + cx*jGrid.cellSize[XX] > bx1) + if (jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] > bx1) { - d2zx += gmx::square(jGrid.c0[XX] + cx*jGrid.cellSize[XX] - bx1); + d2zx += gmx::square(jGridDims.lowerCorner[XX] + cx*jGridDims.cellSize[XX] - bx1); } - else if (jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] < bx0) + else if (jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] < bx0) { - d2zx += gmx::square(jGrid.c0[XX] + (cx+1)*jGrid.cellSize[XX] - bx0); + d2zx += gmx::square(jGridDims.lowerCorner[XX] + (cx+1)*jGridDims.cellSize[XX] - bx0); } if (isIntraGridList && @@ -3568,17 +3578,17 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, for (int cy = cyf_x; cy <= cyl; cy++) { - const int columnStart = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy]; - const int columnEnd = jGrid.cxy_ind[cx*jGrid.numCells[YY] + cy + 1]; + const int columnStart = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy); + const int columnEnd = jGrid.firstCellInColumn(cx*jGridDims.numCells[YY] + cy + 1); d2zxy = d2zx; - if (jGrid.c0[YY] + cy*jGrid.cellSize[YY] > by1) + if (jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] > by1) { - d2zxy += gmx::square(jGrid.c0[YY] + cy*jGrid.cellSize[YY] - by1); + d2zxy += gmx::square(jGridDims.lowerCorner[YY] + cy*jGridDims.cellSize[YY] - by1); } - else if (jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] < by0) + else if (jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] < by0) { - d2zxy += gmx::square(jGrid.c0[YY] + (cy+1)*jGrid.cellSize[YY] - by0); + d2zxy += gmx::square(jGridDims.lowerCorner[YY] + (cy + 1)*jGridDims.cellSize[YY] - by0); } if (columnStart < columnEnd && d2zxy < listRangeBBToJCell2) { @@ -3725,7 +3735,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search *nbs, if (bFBufferFlag && getNumSimpleJClustersInList(*nbl) > ncj_old_i) { - bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cell0+ci) >> gridi_flag_shift]), th); + bitmask_init_bit(&(work->buffer_flags.flag[(iGrid.cellOffset() + ci) >> gridi_flag_shift]), th); } } @@ -4104,7 +4114,7 @@ nonbonded_verlet_t::PairlistSets::construct(const InteractionLocality iLocality for (int zi = 0; zi < nzi; zi++) { - const nbnxn_grid_t &iGrid = nbs->grid[zi]; + const Grid &iGrid = nbs->grid[zi]; int zj0; int zj1; @@ -4124,7 +4134,7 @@ nonbonded_verlet_t::PairlistSets::construct(const InteractionLocality iLocality } for (int zj = zj0; zj < zj1; zj++) { - const nbnxn_grid_t &jGrid = nbs->grid[zj]; + const Grid &jGrid = nbs->grid[zj]; if (debug) { diff --git a/src/gromacs/nbnxm/pairlist_simd_2xmm.h b/src/gromacs/nbnxm/pairlist_simd_2xmm.h index ecfb60e374..df06703fc2 100644 --- a/src/gromacs/nbnxm/pairlist_simd_2xmm.h +++ b/src/gromacs/nbnxm/pairlist_simd_2xmm.h @@ -73,7 +73,7 @@ icell_set_x_simd_2xnn(int ci, * \param[in,out] numDistanceChecks The number of distance checks performed */ static inline void -makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, +makeClusterListSimd2xnn(const Grid &jGrid, NbnxnPairlistCpu * nbl, int icluster, int firstCell, @@ -116,9 +116,9 @@ makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, while (!InRange && jclusterFirst <= jclusterLast) { #if NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes()); #else - d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes()); #endif *numDistanceChecks += 2; @@ -133,7 +133,7 @@ makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - xind_f = xIndexFromCj(cjFromCi(jGrid.cell0) + jclusterFirst); + xind_f = xIndexFromCj(cjFromCi(jGrid.cellOffset()) + jclusterFirst); jx_S = loadDuplicateHsimd(x_j + xind_f + 0*c_xStride2xNN); jy_S = loadDuplicateHsimd(x_j + xind_f + 1*c_xStride2xNN); @@ -174,9 +174,9 @@ makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, while (!InRange && jclusterLast > jclusterFirst) { #if NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes()); #else - d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes()); #endif *numDistanceChecks += 2; @@ -191,7 +191,7 @@ makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - xind_l = xIndexFromCj(cjFromCi(jGrid.cell0) + jclusterLast); + xind_l = xIndexFromCj(cjFromCi(jGrid.cellOffset()) + jclusterLast); jx_S = loadDuplicateHsimd(x_j + xind_l + 0*c_xStride2xNN); jy_S = loadDuplicateHsimd(x_j + xind_l + 1*c_xStride2xNN); @@ -230,7 +230,7 @@ makeClusterListSimd2xnn(const nbnxn_grid_t &jGrid, { /* Store cj and the interaction mask */ nbnxn_cj_t cjEntry; - cjEntry.cj = cjFromCi(jGrid.cell0) + jcluster; + cjEntry.cj = cjFromCi(jGrid.cellOffset()) + jcluster; cjEntry.excl = get_imask_simd_2xnn(excludeSubDiagonal, icluster, jcluster); nbl->cj.push_back(cjEntry); } diff --git a/src/gromacs/nbnxm/pairlist_simd_4xm.h b/src/gromacs/nbnxm/pairlist_simd_4xm.h index 7ca3d8c6e1..11714a4239 100644 --- a/src/gromacs/nbnxm/pairlist_simd_4xm.h +++ b/src/gromacs/nbnxm/pairlist_simd_4xm.h @@ -79,7 +79,7 @@ icell_set_x_simd_4xn(int ci, * \param[in,out] numDistanceChecks The number of distance checks performed */ static inline void -makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, +makeClusterListSimd4xn(const Grid &jGrid, NbnxnPairlistCpu * nbl, int icluster, int firstCell, @@ -129,9 +129,9 @@ makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, while (!InRange && jclusterFirst <= jclusterLast) { #if NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes()); #else - d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterFirst, jGrid.jBoundingBoxes()); #endif *numDistanceChecks += 2; @@ -146,7 +146,7 @@ makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - xind_f = xIndexFromCj(cjFromCi(jGrid.cell0) + jclusterFirst); + xind_f = xIndexFromCj(cjFromCi(jGrid.cellOffset()) + jclusterFirst); jx_S = load(x_j + xind_f + 0*c_xStride4xN); jy_S = load(x_j + xind_f + 1*c_xStride4xN); @@ -200,9 +200,9 @@ makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, while (!InRange && jclusterLast > jclusterFirst) { #if NBNXN_SEARCH_BB_SIMD4 - d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.bbj); + d2 = subc_bb_dist2_simd4(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes()); #else - d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.bbj); + d2 = subc_bb_dist2(0, bb_ci, jclusterLast, jGrid.jBoundingBoxes()); #endif *numDistanceChecks += 2; @@ -217,7 +217,7 @@ makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, } else if (d2 < rlist2) { - xind_l = xIndexFromCj(cjFromCi(jGrid.cell0) + jclusterLast); + xind_l = xIndexFromCj(cjFromCi(jGrid.cellOffset()) + jclusterLast); jx_S = load(x_j +xind_l + 0*c_xStride4xN); jy_S = load(x_j +xind_l + 1*c_xStride4xN); @@ -268,7 +268,7 @@ makeClusterListSimd4xn(const nbnxn_grid_t &jGrid, { /* Store cj and the interaction mask */ nbnxn_cj_t cjEntry; - cjEntry.cj = cjFromCi(jGrid.cell0) + jcluster; + cjEntry.cj = cjFromCi(jGrid.cellOffset()) + jcluster; cjEntry.excl = get_imask_simd_4xn(excludeSubDiagonal, icluster, jcluster); nbl->cj.push_back(cjEntry); } diff --git a/src/gromacs/nbnxm/pairlistset.h b/src/gromacs/nbnxm/pairlistset.h index d550537f5b..26e54c99a3 100644 --- a/src/gromacs/nbnxm/pairlistset.h +++ b/src/gromacs/nbnxm/pairlistset.h @@ -48,6 +48,14 @@ * * TODO: Merge into the constructor */ +typedef void nbnxn_free_t (void *ptr); + +/* Tells if the pair-list corresponding to nb_kernel_type is simple. + * Returns FALSE for super-sub type pair-list. + */ +gmx_bool nbnxn_kernel_pairlist_simple(int nb_kernel_type); + +/* Initializes a set of pair lists stored in nbnxn_pairlist_set_t */ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list); /*! \brief Prepare the list-set produced by the search for dynamic pruning diff --git a/src/gromacs/nbnxm/pairlistwork.h b/src/gromacs/nbnxm/pairlistwork.h index 5542cbecb9..35de7acdb6 100644 --- a/src/gromacs/nbnxm/pairlistwork.h +++ b/src/gromacs/nbnxm/pairlistwork.h @@ -67,11 +67,11 @@ struct NbnxnPairlistCpuWork } // The bounding boxes, pbc shifted, for each cluster - AlignedVector bb; + AlignedVector bb; // The coordinates, pbc shifted, for each atom - std::vector x; + std::vector x; // Aligned list for storing 4*DIM*GMX_SIMD_REAL_WIDTH reals - AlignedVector xSimd; + AlignedVector xSimd; }; // Protect data from cache pollution between threads @@ -109,13 +109,13 @@ struct NbnxnPairlistGpuWork } // The bounding boxes, pbc shifted, for each cluster - AlignedVector bb; + AlignedVector bb; // As bb, but in packed xxxx format - AlignedVector bbPacked; + AlignedVector bbPacked; // The coordinates, pbc shifted, for each atom - AlignedVector x; + AlignedVector x; // Aligned coordinate list used for 4*DIM*GMX_SIMD_REAL_WIDTH floats - AlignedVector xSimd; + AlignedVector xSimd; }; NbnxnPairlistGpuWork() : -- 2.22.0