2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements the Grid class.
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_nbnxm
54 #include "gromacs/domdec/domdec_struct.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdlib/gmx_omp_nthreads.h"
58 #include "gromacs/mdlib/updategroupscog.h"
59 #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
60 #include "gromacs/nbnxm/atomdata.h"
61 #include "gromacs/nbnxm/nbnxm.h"
62 #include "gromacs/nbnxm/nbnxm_geometry.h"
63 #include "gromacs/nbnxm/pairlist.h"
64 #include "gromacs/nbnxm/pairlistset.h"
65 #include "gromacs/simd/simd.h"
66 #include "gromacs/simd/vector_operations.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/smalloc.h"
72 struct gmx_domdec_zones_t;
77 Grid::Geometry::Geometry(const PairlistType pairlistType) :
78 isSimple(pairlistType != PairlistType::Hierarchical8x8),
79 numAtomsICluster(IClusterSizePerListType[pairlistType]),
80 numAtomsJCluster(JClusterSizePerListType[pairlistType]),
81 numAtomsPerCell((isSimple ? 1 : c_gpuNumClusterPerCell)*numAtomsICluster),
82 numAtomsICluster2Log(get_2log(numAtomsICluster))
86 Grid::Grid(const PairlistType pairlistType) :
87 geometry_(pairlistType)
91 /*! \brief Returns the atom density (> 0) of a rectangular grid */
92 static real gridAtomDensity(int numAtoms,
93 const rvec lowerCorner,
94 const rvec upperCorner)
100 /* To avoid zero density we use a minimum of 1 atom */
104 rvec_sub(upperCorner, lowerCorner, size);
106 return numAtoms/(size[XX]*size[YY]*size[ZZ]);
109 void Grid::setDimensions(const nbnxn_search *nbs,
112 const rvec lowerCorner,
113 const rvec upperCorner,
115 const real maxAtomGroupRadius)
117 /* For the home zone we compute the density when not set (=-1) or when =0 */
118 if (ddZone == 0 && atomDensity <= 0)
120 atomDensity = gridAtomDensity(numAtoms, lowerCorner, upperCorner);
123 dimensions_.atomDensity = atomDensity;
124 dimensions_.maxAtomGroupRadius = maxAtomGroupRadius;
127 rvec_sub(upperCorner, lowerCorner, size);
129 if (numAtoms > geometry_.numAtomsPerCell)
131 GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive");
133 /* target cell length */
136 if (geometry_.isSimple)
138 /* To minimize the zero interactions, we should make
139 * the largest of the i/j cell cubic.
141 int numAtomsInCell = std::max(geometry_.numAtomsICluster,
142 geometry_.numAtomsJCluster);
144 /* Approximately cubic cells */
145 real tlen = std::cbrt(numAtomsInCell/atomDensity);
151 /* Approximately cubic sub cells */
152 real tlen = std::cbrt(geometry_.numAtomsICluster/atomDensity);
153 tlen_x = tlen*c_gpuNumClusterPerCellX;
154 tlen_y = tlen*c_gpuNumClusterPerCellY;
156 /* We round ncx and ncy down, because we get less cell pairs
157 * in the nbsist when the fixed cell dimensions (x,y) are
158 * larger than the variable one (z) than the other way around.
160 dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
161 dimensions_.numCells[YY] = std::max(1, static_cast<int>(size[YY]/tlen_y));
165 dimensions_.numCells[XX] = 1;
166 dimensions_.numCells[YY] = 1;
169 for (int d = 0; d < DIM - 1; d++)
171 dimensions_.cellSize[d] = size[d]/dimensions_.numCells[d];
172 dimensions_.invCellSize[d] = 1/dimensions_.cellSize[d];
177 /* This is a non-home zone, add an extra row of cells
178 * for particles communicated for bonded interactions.
179 * These can be beyond the cut-off. It doesn't matter where
180 * they end up on the grid, but for performance it's better
181 * if they don't end up in cells that can be within cut-off range.
183 dimensions_.numCells[XX]++;
184 dimensions_.numCells[YY]++;
187 /* We need one additional cell entry for particles moved by DD */
188 cxy_na_.resize(numColumns() + 1);
189 cxy_ind_.resize(numColumns() + 2);
191 for (nbnxn_search_work_t &work : nbs->work)
193 work.cxy_na.resize(numColumns() + 1);
196 /* Worst case scenario of 1 atom in each last cell */
198 if (geometry_.numAtomsJCluster <= geometry_.numAtomsICluster)
200 maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns();
204 maxNumCells = numAtoms/geometry_.numAtomsPerCell + numColumns()*geometry_.numAtomsJCluster/geometry_.numAtomsICluster;
207 if (!geometry_.isSimple)
209 numClusters_.resize(maxNumCells);
211 bbcz_.resize(maxNumCells*NNBSBB_D);
213 /* This resize also zeros the contents, this avoid possible
214 * floating exceptions in SIMD with the unused bb elements.
216 if (geometry_.isSimple)
218 bb_.resize(maxNumCells);
223 pbb_.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX);
225 bb_.resize(maxNumCells*c_gpuNumClusterPerCell);
229 if (geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
235 GMX_ASSERT(geometry_.isSimple, "Only CPU lists should have different i/j cluster sizes");
237 bbjStorage_.resize(maxNumCells*geometry_.numAtomsICluster/geometry_.numAtomsJCluster);
241 flags_.resize(maxNumCells);
244 fep_.resize(maxNumCells*geometry_.numAtomsPerCell/geometry_.numAtomsICluster);
247 copy_rvec(lowerCorner, dimensions_.lowerCorner);
248 copy_rvec(upperCorner, dimensions_.upperCorner);
249 copy_rvec(size, dimensions_.gridSize);
252 /* We need to sort paricles in grid columns on z-coordinate.
253 * As particle are very often distributed homogeneously, we use a sorting
254 * algorithm similar to pigeonhole sort. We multiply the z-coordinate
255 * by a factor, cast to an int and try to store in that hole. If the hole
256 * is full, we move this or another particle. A second pass is needed to make
257 * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
258 * 4 is the optimal value for homogeneous particle distribution and allows
259 * for an O(#particles) sort up till distributions were all particles are
260 * concentrated in 1/4 of the space. No NlogN fallback is implemented,
261 * as it can be expensive to detect imhomogeneous particle distributions.
263 /*! \brief Ratio of grid cells to atoms */
264 static constexpr int c_sortGridRatio = 4;
265 /*! \brief Maximum ratio of holes used, in the worst case all particles
266 * end up in the last hole and we need num. atoms extra holes at the end.
268 static constexpr int c_sortGridMaxSizeFactor = c_sortGridRatio + 1;
270 /*! \brief Sorts particle index a on coordinates x along dim.
272 * Backwards tells if we want decreasing iso increasing coordinates.
273 * h0 is the minimum of the coordinate range.
274 * invh is the 1/length of the sorting range.
275 * n_per_h (>=n) is the expected average number of particles per 1/invh
276 * sort is the sorting work array.
277 * sort should have a size of at least n_per_h*c_sortGridRatio + n,
278 * or easier, allocate at least n*c_sortGridMaxSizeFactor elements.
280 static void sort_atoms(int dim, gmx_bool Backwards,
281 int gmx_unused dd_zone,
282 bool gmx_unused relevantAtomsAreWithinGridBounds,
284 gmx::ArrayRef<const gmx::RVec> x,
285 real h0, real invh, int n_per_h,
286 gmx::ArrayRef<int> sort)
294 GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h");
296 /* Transform the inverse range height into the inverse hole height */
297 invh *= n_per_h*c_sortGridRatio;
299 /* Set nsort to the maximum possible number of holes used.
300 * In worst case all n elements end up in the last bin.
302 int nsort = n_per_h*c_sortGridRatio + n;
304 /* Determine the index range used, so we can limit it for the second pass */
305 int zi_min = INT_MAX;
308 /* Sort the particles using a simple index sort */
309 for (int i = 0; i < n; i++)
311 /* The cast takes care of float-point rounding effects below zero.
312 * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
313 * times the box height out of the box.
315 int zi = static_cast<int>((x[a[i]][dim] - h0)*invh);
318 /* As we can have rounding effect, we use > iso >= here */
319 if (relevantAtomsAreWithinGridBounds &&
320 (zi < 0 || (dd_zone == 0 && zi > n_per_h*c_sortGridRatio)))
322 gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
323 a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi,
324 n_per_h, c_sortGridRatio);
332 /* In a non-local domain, particles communcated for bonded interactions
333 * can be far beyond the grid size, which is set by the non-bonded
334 * cut-off distance. We sort such particles into the last cell.
336 if (zi > n_per_h*c_sortGridRatio)
338 zi = n_per_h*c_sortGridRatio;
341 /* Ideally this particle should go in sort cell zi,
342 * but that might already be in use,
343 * in that case find the first empty cell higher up
348 zi_min = std::min(zi_min, zi);
349 zi_max = std::max(zi_max, zi);
353 /* We have multiple atoms in the same sorting slot.
354 * Sort on real z for minimal bounding box size.
355 * There is an extra check for identical z to ensure
356 * well-defined output order, independent of input order
357 * to ensure binary reproducibility after restarts.
359 while (sort[zi] >= 0 && ( x[a[i]][dim] > x[sort[zi]][dim] ||
360 (x[a[i]][dim] == x[sort[zi]][dim] &&
368 /* Shift all elements by one slot until we find an empty slot */
371 while (sort[zim] >= 0)
379 zi_max = std::max(zi_max, zim);
382 zi_max = std::max(zi_max, zi);
389 for (int zi = 0; zi < nsort; zi++)
400 for (int zi = zi_max; zi >= zi_min; zi--)
411 gmx_incons("Lost particles while sorting");
416 //! Returns double up to one least significant float bit smaller than x
417 static double R2F_D(const float x)
419 return static_cast<float>(x >= 0 ? (1 - GMX_FLOAT_EPS)*x : (1 + GMX_FLOAT_EPS)*x);
421 //! Returns double up to one least significant float bit larger than x
422 static double R2F_U(const float x)
424 return static_cast<float>(x >= 0 ? (1 + GMX_FLOAT_EPS)*x : (1 - GMX_FLOAT_EPS)*x);
428 static float R2F_D(const float x)
433 static float R2F_U(const float x)
439 //! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0
440 static void calc_bounding_box(int na, int stride, const real *x,
444 real xl, xh, yl, yh, zl, zh;
454 for (int j = 1; j < na; j++)
456 xl = std::min(xl, x[i+XX]);
457 xh = std::max(xh, x[i+XX]);
458 yl = std::min(yl, x[i+YY]);
459 yh = std::max(yh, x[i+YY]);
460 zl = std::min(zl, x[i+ZZ]);
461 zh = std::max(zh, x[i+ZZ]);
464 /* Note: possible double to float conversion here */
465 bb->lower[BB_X] = R2F_D(xl);
466 bb->lower[BB_Y] = R2F_D(yl);
467 bb->lower[BB_Z] = R2F_D(zl);
468 bb->upper[BB_X] = R2F_U(xh);
469 bb->upper[BB_Y] = R2F_U(yh);
470 bb->upper[BB_Z] = R2F_U(zh);
473 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
474 static void calc_bounding_box_x_x4(int na, const real *x,
477 real xl, xh, yl, yh, zl, zh;
485 for (int j = 1; j < na; j++)
487 xl = std::min(xl, x[j+XX*c_packX4]);
488 xh = std::max(xh, x[j+XX*c_packX4]);
489 yl = std::min(yl, x[j+YY*c_packX4]);
490 yh = std::max(yh, x[j+YY*c_packX4]);
491 zl = std::min(zl, x[j+ZZ*c_packX4]);
492 zh = std::max(zh, x[j+ZZ*c_packX4]);
494 /* Note: possible double to float conversion here */
495 bb->lower[BB_X] = R2F_D(xl);
496 bb->lower[BB_Y] = R2F_D(yl);
497 bb->lower[BB_Z] = R2F_D(zl);
498 bb->upper[BB_X] = R2F_U(xh);
499 bb->upper[BB_Y] = R2F_U(yh);
500 bb->upper[BB_Z] = R2F_U(zh);
503 /*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
504 static void calc_bounding_box_x_x8(int na, const real *x,
507 real xl, xh, yl, yh, zl, zh;
515 for (int j = 1; j < na; j++)
517 xl = std::min(xl, x[j+XX*c_packX8]);
518 xh = std::max(xh, x[j+XX*c_packX8]);
519 yl = std::min(yl, x[j+YY*c_packX8]);
520 yh = std::max(yh, x[j+YY*c_packX8]);
521 zl = std::min(zl, x[j+ZZ*c_packX8]);
522 zh = std::max(zh, x[j+ZZ*c_packX8]);
524 /* Note: possible double to float conversion here */
525 bb->lower[BB_X] = R2F_D(xl);
526 bb->lower[BB_Y] = R2F_D(yl);
527 bb->lower[BB_Z] = R2F_D(zl);
528 bb->upper[BB_X] = R2F_U(xh);
529 bb->upper[BB_Y] = R2F_U(yh);
530 bb->upper[BB_Z] = R2F_U(zh);
533 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
534 gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x,
538 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
541 calc_bounding_box_x_x4(std::min(na, 2), x, bbj);
545 calc_bounding_box_x_x4(std::min(na-2, 2), x+(c_packX4>>1), bbj+1);
549 /* Set the "empty" bounding box to the same as the first one,
550 * so we don't need to treat special cases in the rest of the code.
552 #if NBNXN_SEARCH_BB_SIMD4
553 store4(&bbj[1].lower[0], load4(&bbj[0].lower[0]));
554 store4(&bbj[1].upper[0], load4(&bbj[0].upper[0]));
560 #if NBNXN_SEARCH_BB_SIMD4
561 store4(&bb->lower[0], min(load4(&bbj[0].lower[0]), load4(&bbj[1].lower[0])));
562 store4(&bb->upper[0], max(load4(&bbj[0].upper[0]), load4(&bbj[1].upper[0])));
567 for (i = 0; i < NNBSBB_C; i++)
569 bb->lower[i] = std::min(bbj[0].lower[i], bbj[1].lower[i]);
570 bb->upper[i] = std::max(bbj[0].upper[i], bbj[1].upper[i]);
576 #if NBNXN_SEARCH_BB_SIMD4
578 /*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */
579 static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
582 real xl, xh, yl, yh, zl, zh;
592 for (int j = 1; j < na; j++)
594 xl = std::min(xl, x[i+XX]);
595 xh = std::max(xh, x[i+XX]);
596 yl = std::min(yl, x[i+YY]);
597 yh = std::max(yh, x[i+YY]);
598 zl = std::min(zl, x[i+ZZ]);
599 zh = std::max(zh, x[i+ZZ]);
602 /* Note: possible double to float conversion here */
603 bb[0*STRIDE_PBB] = R2F_D(xl);
604 bb[1*STRIDE_PBB] = R2F_D(yl);
605 bb[2*STRIDE_PBB] = R2F_D(zl);
606 bb[3*STRIDE_PBB] = R2F_U(xh);
607 bb[4*STRIDE_PBB] = R2F_U(yh);
608 bb[5*STRIDE_PBB] = R2F_U(zh);
611 #endif /* NBNXN_SEARCH_BB_SIMD4 */
613 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
615 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */
616 static void calc_bounding_box_simd4(int na, const float *x,
619 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
622 Simd4Float bb_0_S, bb_1_S;
628 for (int i = 1; i < na; i++)
630 x_S = load4(x+i*NNBSBB_C);
631 bb_0_S = min(bb_0_S, x_S);
632 bb_1_S = max(bb_1_S, x_S);
635 store4(&bb->lower[0], bb_0_S);
636 store4(&bb->upper[0], bb_1_S);
639 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
640 static void calc_bounding_box_xxxx_simd4(int na, const float *x,
641 BoundingBox *bb_work_aligned,
644 calc_bounding_box_simd4(na, x, bb_work_aligned);
646 bb[0*STRIDE_PBB] = bb_work_aligned->lower[BB_X];
647 bb[1*STRIDE_PBB] = bb_work_aligned->lower[BB_Y];
648 bb[2*STRIDE_PBB] = bb_work_aligned->lower[BB_Z];
649 bb[3*STRIDE_PBB] = bb_work_aligned->upper[BB_X];
650 bb[4*STRIDE_PBB] = bb_work_aligned->upper[BB_Y];
651 bb[5*STRIDE_PBB] = bb_work_aligned->upper[BB_Z];
654 #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
657 /*! \brief Combines pairs of consecutive bounding boxes */
658 static void combine_bounding_box_pairs(const Grid &grid,
659 gmx::ArrayRef<const BoundingBox> bb,
660 gmx::ArrayRef<BoundingBox> bbj)
662 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
665 for (int i = 0; i < grid.numColumns(); i++)
667 /* Starting bb in a column is expected to be 2-aligned */
668 const int sc2 = grid.firstCellInColumn(i) >> 1;
669 /* For odd numbers skip the last bb here */
670 const int nc2 = (grid.numAtomsInColumn(i) + 3) >> (2 + 1);
672 for (c2 = sc2; c2 < sc2 + nc2; c2++)
674 #if NBNXN_SEARCH_BB_SIMD4
675 Simd4Float min_S, max_S;
677 min_S = min(load4(&bb[c2*2+0].lower[0]),
678 load4(&bb[c2*2+1].lower[0]));
679 max_S = max(load4(&bb[c2*2+0].upper[0]),
680 load4(&bb[c2*2+1].upper[0]));
681 store4(&bbj[c2].lower[0], min_S);
682 store4(&bbj[c2].upper[0], max_S);
684 for (int j = 0; j < NNBSBB_C; j++)
686 bbj[c2].lower[j] = std::min(bb[c2*2 + 0].lower[j],
687 bb[c2*2 + 1].lower[j]);
688 bbj[c2].upper[j] = std::max(bb[c2*2 + 0].upper[j],
689 bb[c2*2 + 1].upper[j]);
693 if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1)
695 /* The bb count in this column is odd: duplicate the last bb */
696 for (int j = 0; j < NNBSBB_C; j++)
698 bbj[c2].lower[j] = bb[c2*2].lower[j];
699 bbj[c2].upper[j] = bb[c2*2].upper[j];
706 /*! \brief Prints the average bb size, used for debug output */
707 static void print_bbsizes_simple(FILE *fp,
713 for (int c = 0; c < grid.numCells(); c++)
715 for (int d = 0; d < DIM; d++)
717 ba[d] += grid.iBoundingBoxes()[c].upper[d] - grid.iBoundingBoxes()[c].lower[d];
720 dsvmul(1.0/grid.numCells(), ba, ba);
722 const Grid::Dimensions &dims = grid.dimensions();
723 real avgCellSizeZ = (dims.atomDensity > 0 ? grid.geometry().numAtomsICluster/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]) : 0.0);
725 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
729 ba[XX], ba[YY], ba[ZZ],
730 ba[XX]*dims.invCellSize[XX],
731 ba[YY]*dims.invCellSize[YY],
732 dims.atomDensity > 0 ? ba[ZZ]/avgCellSizeZ : 0.0);
735 /*! \brief Prints the average bb size, used for debug output */
736 static void print_bbsizes_supersub(FILE *fp,
744 for (int c = 0; c < grid.numCells(); c++)
747 for (int s = 0; s < grid.numClustersPerCell()[c]; s += STRIDE_PBB)
749 int cs_w = (c*c_gpuNumClusterPerCell + s)/STRIDE_PBB;
750 for (int i = 0; i < STRIDE_PBB; i++)
752 for (int d = 0; d < DIM; d++)
755 grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + (DIM + d)*STRIDE_PBB + i] -
756 grid.packedBoundingBoxes()[cs_w*NNBSBB_XXXX + d *STRIDE_PBB + i];
761 for (int s = 0; s < grid.numClustersPerCell()[c]; s++)
763 int cs = c*c_gpuNumClusterPerCell + s;
764 for (int d = 0; d < DIM; d++)
766 ba[d] += grid.iBoundingBoxes()[cs].upper[d] - grid.iBoundingBoxes()[cs].lower[d];
770 ns += grid.numClustersPerCell()[c];
772 dsvmul(1.0/ns, ba, ba);
774 const Grid::Dimensions &dims = grid.dimensions();
775 const real avgClusterSizeZ =
776 (dims.atomDensity > 0 ? grid.geometry().numAtomsPerCell/(dims.atomDensity*dims.cellSize[XX]*dims.cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0);
778 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
779 dims.cellSize[XX]/c_gpuNumClusterPerCellX,
780 dims.cellSize[YY]/c_gpuNumClusterPerCellY,
782 ba[XX], ba[YY], ba[ZZ],
783 ba[XX]*c_gpuNumClusterPerCellX*dims.invCellSize[XX],
784 ba[YY]*c_gpuNumClusterPerCellY*dims.invCellSize[YY],
785 dims.atomDensity > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0);
788 /*!\brief Set non-bonded interaction flags for the current cluster.
790 * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
792 static void sort_cluster_on_flag(int numAtomsInCluster,
796 gmx::ArrayRef<int> order,
799 constexpr int c_maxNumAtomsInCluster = 8;
800 int sort1[c_maxNumAtomsInCluster];
801 int sort2[c_maxNumAtomsInCluster];
803 GMX_ASSERT(numAtomsInCluster <= c_maxNumAtomsInCluster, "Need to increase c_maxNumAtomsInCluster to support larger clusters");
808 for (int s = atomStart; s < atomEnd; s += numAtomsInCluster)
810 /* Make lists for this (sub-)cell on atoms with and without LJ */
813 gmx_bool haveQ = FALSE;
815 for (int a = s; a < std::min(s + numAtomsInCluster, atomEnd); a++)
817 haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]);
819 if (GET_CGINFO_HAS_VDW(atinfo[order[a]]))
821 sort1[n1++] = order[a];
826 sort2[n2++] = order[a];
830 /* If we don't have atoms with LJ, there's nothing to sort */
833 *flags |= NBNXN_CI_DO_LJ(subc);
835 if (2*n1 <= numAtomsInCluster)
837 /* Only sort when strictly necessary. Ordering particles
838 * Ordering particles can lead to less accurate summation
839 * due to rounding, both for LJ and Coulomb interactions.
841 if (2*(a_lj_max - s) >= numAtomsInCluster)
843 for (int i = 0; i < n1; i++)
845 order[atomStart + i] = sort1[i];
847 for (int j = 0; j < n2; j++)
849 order[atomStart + n1 + j] = sort2[j];
853 *flags |= NBNXN_CI_HALF_LJ(subc);
858 *flags |= NBNXN_CI_DO_COUL(subc);
864 /*! \brief Fill a pair search cell with atoms.
866 * Potentially sorts atoms and sets the interaction flags.
868 void Grid::fillCell(nbnxn_search *nbs,
869 nbnxn_atomdata_t *nbat,
873 gmx::ArrayRef<const gmx::RVec> x,
874 BoundingBox gmx_unused *bb_work_aligned)
876 const int numAtoms = atomEnd - atomStart;
878 if (geometry_.isSimple)
880 /* Note that non-local grids are already sorted.
881 * Then sort_cluster_on_flag will only set the flags and the sorting
882 * will not affect the atom order.
884 sort_cluster_on_flag(geometry_.numAtomsICluster, atomStart, atomEnd, atinfo, nbs->a,
885 flags_.data() + atomToCluster(atomStart) - cellOffset_);
890 /* Set the fep flag for perturbed atoms in this (sub-)cell */
892 /* The grid-local cluster/(sub-)cell index */
893 int cell = atomToCluster(atomStart) - cellOffset_*(geometry_.isSimple ? 1 : c_gpuNumClusterPerCell);
895 for (int at = atomStart; at < atomEnd; at++)
897 if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]]))
899 fep_[cell] |= (1 << (at - atomStart));
904 /* Now we have sorted the atoms, set the cell indices */
905 for (int at = atomStart; at < atomEnd; at++)
907 nbs->cell[nbs->a[at]] = at;
910 copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, geometry_.numAtomsICluster,
911 as_rvec_array(x.data()),
912 nbat->XFormat, nbat->x().data(), atomStart);
914 if (nbat->XFormat == nbatX4)
916 /* Store the bounding boxes as xyz.xyz. */
917 size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
918 BoundingBox *bb_ptr = bb_.data() + offset;
920 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
921 if (2*geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
923 calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
924 bbj_.data() + offset*2);
929 calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
932 else if (nbat->XFormat == nbatX8)
934 /* Store the bounding boxes as xyz.xyz. */
935 size_t offset = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsICluster);
936 BoundingBox *bb_ptr = bb_.data() + offset;
938 calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
941 else if (!geometry_.isSimple)
943 /* Store the bounding boxes in a format convenient
944 * for SIMD4 calculations: xxxxyyyyzzzz...
948 ((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> (geometry_.numAtomsICluster2Log + STRIDE_PBB_2LOG))*NNBSBB_XXXX +
949 (((atomStart - cellOffset_*geometry_.numAtomsPerCell) >> geometry_.numAtomsICluster2Log) & (STRIDE_PBB - 1));
951 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
952 if (nbat->XFormat == nbatXYZQ)
954 calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
955 bb_work_aligned, pbb_ptr);
960 calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
965 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
966 atomToCluster(atomStart),
967 pbb_ptr[0*STRIDE_PBB], pbb_ptr[3*STRIDE_PBB],
968 pbb_ptr[1*STRIDE_PBB], pbb_ptr[4*STRIDE_PBB],
969 pbb_ptr[2*STRIDE_PBB], pbb_ptr[5*STRIDE_PBB]);
975 /* Store the bounding boxes as xyz.xyz. */
976 BoundingBox *bb_ptr = bb_.data() + atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
978 calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
983 int bbo = atomToCluster(atomStart - cellOffset_*geometry_.numAtomsPerCell);
984 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
985 atomToCluster(atomStart),
986 bb_[bbo].lower[BB_X],
987 bb_[bbo].lower[BB_Y],
988 bb_[bbo].lower[BB_Z],
989 bb_[bbo].upper[BB_X],
990 bb_[bbo].upper[BB_Y],
991 bb_[bbo].upper[BB_Z]);
996 void Grid::sortColumnsCpuGeometry(nbnxn_search *nbs,
998 int atomStart, int atomEnd,
1000 gmx::ArrayRef<const gmx::RVec> x,
1001 nbnxn_atomdata_t *nbat,
1002 int cxy_start, int cxy_end,
1003 gmx::ArrayRef<int> sort_work)
1007 fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1008 cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
1011 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1013 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1015 /* Sort the atoms within each x,y column in 3 dimensions */
1016 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
1018 const int numAtoms = numAtomsInColumn(cxy);
1019 const int numCellsZ = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1020 const int atomOffset = firstAtomInColumn(cxy);
1022 /* Sort the atoms within each x,y column on z coordinate */
1023 sort_atoms(ZZ, FALSE, dd_zone,
1024 relevantAtomsAreWithinGridBounds,
1025 nbs->a.data() + atomOffset, numAtoms, x,
1026 dimensions_.lowerCorner[ZZ],
1027 1.0/dimensions_.gridSize[ZZ], numCellsZ*numAtomsPerCell,
1030 /* Fill the ncz cells in this column */
1031 const int firstCell = firstCellInColumn(cxy);
1032 int cellFilled = firstCell;
1033 for (int cellZ = 0; cellZ < numCellsZ; cellZ++)
1035 const int cell = firstCell + cellZ;
1037 const int atomOffsetCell = atomOffset + cellZ*numAtomsPerCell;
1038 const int numAtomsCell = std::min(numAtomsPerCell, numAtoms - (atomOffsetCell - atomOffset));
1041 atomOffsetCell, atomOffsetCell + numAtomsCell, atinfo, x,
1044 /* This copy to bbcz is not really necessary.
1045 * But it allows to use the same grid search code
1046 * for the simple and supersub cell setups.
1048 if (numAtomsCell > 0)
1052 bbcz_[cell*NNBSBB_D ] = bb_[cellFilled].lower[BB_Z];
1053 bbcz_[cell*NNBSBB_D + 1] = bb_[cellFilled].upper[BB_Z];
1056 /* Set the unused atom indices to -1 */
1057 for (int ind = numAtoms; ind < numCellsZ*numAtomsPerCell; ind++)
1059 nbs->a[atomOffset + ind] = -1;
1064 /* Spatially sort the atoms within one grid column */
1065 void Grid::sortColumnsGpuGeometry(nbnxn_search *nbs,
1067 int atomStart, int atomEnd,
1069 gmx::ArrayRef<const gmx::RVec> x,
1070 nbnxn_atomdata_t *nbat,
1071 int cxy_start, int cxy_end,
1072 gmx::ArrayRef<int> sort_work)
1074 BoundingBox bb_work_array[2];
1075 BoundingBox *bb_work_aligned = reinterpret_cast<BoundingBox *>((reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
1079 fprintf(debug, "cell_offset %d sorting columns %d - %d, atoms %d - %d\n",
1080 cellOffset_, cxy_start, cxy_end, atomStart, atomEnd);
1083 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1085 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1087 const int subdiv_x = geometry_.numAtomsICluster;
1088 const int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x;
1089 const int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y;
1091 /* Sort the atoms within each x,y column in 3 dimensions.
1092 * Loop over all columns on the x/y grid.
1094 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
1096 const int gridX = cxy/dimensions_.numCells[YY];
1097 const int gridY = cxy - gridX*dimensions_.numCells[YY];
1099 const int numAtomsInColumn = cxy_na_[cxy];
1100 const int numCellsInColumn = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1101 const int atomOffset = firstAtomInColumn(cxy);
1103 /* Sort the atoms within each x,y column on z coordinate */
1104 sort_atoms(ZZ, FALSE, dd_zone,
1105 relevantAtomsAreWithinGridBounds,
1106 nbs->a.data() + atomOffset, numAtomsInColumn, x,
1107 dimensions_.lowerCorner[ZZ],
1108 1.0/dimensions_.gridSize[ZZ], numCellsInColumn*numAtomsPerCell,
1111 /* This loop goes over the cells and clusters along z at once */
1112 for (int sub_z = 0; sub_z < numCellsInColumn*c_gpuNumClusterPerCellZ; sub_z++)
1114 const int atomOffsetZ = atomOffset + sub_z*subdiv_z;
1115 const int numAtomsZ = std::min(subdiv_z, numAtomsInColumn - (atomOffsetZ - atomOffset));
1117 /* We have already sorted on z */
1119 if (sub_z % c_gpuNumClusterPerCellZ == 0)
1121 cz = sub_z/c_gpuNumClusterPerCellZ;
1122 const int cell = cxy_ind_[cxy] + cz;
1124 /* The number of atoms in this cell/super-cluster */
1125 const int numAtoms = std::min(numAtomsPerCell, numAtomsInColumn - (atomOffsetZ - atomOffset));
1127 numClusters_[cell] = std::min(c_gpuNumClusterPerCell,
1128 (numAtoms + geometry_.numAtomsICluster - 1)/ geometry_.numAtomsICluster);
1130 /* Store the z-boundaries of the bounding box of the cell */
1131 bbcz_[cell*NNBSBB_D ] = x[nbs->a[atomOffsetZ]][ZZ];
1132 bbcz_[cell*NNBSBB_D+1] = x[nbs->a[atomOffsetZ + numAtoms - 1]][ZZ];
1135 if (c_gpuNumClusterPerCellY > 1)
1137 /* Sort the atoms along y */
1138 sort_atoms(YY, (sub_z & 1) != 0, dd_zone,
1139 relevantAtomsAreWithinGridBounds,
1140 nbs->a.data() + atomOffsetZ, numAtomsZ, x,
1141 dimensions_.lowerCorner[YY] + gridY*dimensions_.cellSize[YY],
1142 dimensions_.invCellSize[YY], subdiv_z,
1146 for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++)
1148 const int atomOffsetY = atomOffsetZ + sub_y*subdiv_y;
1149 const int numAtomsY = std::min(subdiv_y, numAtomsInColumn - (atomOffsetY - atomOffset));
1151 if (c_gpuNumClusterPerCellX > 1)
1153 /* Sort the atoms along x */
1154 sort_atoms(XX, ((cz*c_gpuNumClusterPerCellY + sub_y) & 1) != 0, dd_zone,
1155 relevantAtomsAreWithinGridBounds,
1156 nbs->a.data() + atomOffsetY, numAtomsY, x,
1157 dimensions_.lowerCorner[XX] + gridX*dimensions_.cellSize[XX],
1158 dimensions_.invCellSize[XX], subdiv_y,
1162 for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++)
1164 const int atomOffsetX = atomOffsetY + sub_x*subdiv_x;
1165 const int numAtomsX = std::min(subdiv_x, numAtomsInColumn - (atomOffsetX - atomOffset));
1168 atomOffsetX, atomOffsetX + numAtomsX, atinfo, x,
1174 /* Set the unused atom indices to -1 */
1175 for (int ind = numAtomsInColumn; ind < numCellsInColumn*numAtomsPerCell; ind++)
1177 nbs->a[atomOffset + ind] = -1;
1182 /*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
1183 static void setCellAndAtomCount(gmx::ArrayRef<int> cell,
1185 gmx::ArrayRef<int> cxy_na,
1188 cell[atomIndex] = cellIndex;
1189 cxy_na[cellIndex] += 1;
1192 /*! \brief Determine in which grid column atoms should go */
1193 static void calc_column_indices(const Grid::Dimensions &gridDims,
1194 const gmx::UpdateGroupsCog *updateGroupsCog,
1195 int atomStart, int atomEnd,
1196 gmx::ArrayRef<const gmx::RVec> x,
1197 int dd_zone, const int *move,
1198 int thread, int nthread,
1199 gmx::ArrayRef<int> cell,
1200 gmx::ArrayRef<int> cxy_na)
1202 const int numColumns = gridDims.numCells[XX]*gridDims.numCells[YY];
1204 /* We add one extra cell for particles which moved during DD */
1205 for (int i = 0; i < numColumns; i++)
1210 int taskAtomStart = atomStart + static_cast<int>((thread + 0)*(atomEnd - atomStart))/nthread;
1211 int taskAtomEnd = atomStart + static_cast<int>((thread + 1)*(atomEnd - atomStart))/nthread;
1216 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1218 if (move == nullptr || move[i] >= 0)
1221 const gmx::RVec &coord = (updateGroupsCog ? updateGroupsCog->cogForAtom(i) : x[i]);
1223 /* We need to be careful with rounding,
1224 * particles might be a few bits outside the local zone.
1225 * The int cast takes care of the lower bound,
1226 * we will explicitly take care of the upper bound.
1228 int cx = static_cast<int>((coord[XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
1229 int cy = static_cast<int>((coord[YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
1232 if (cx < 0 || cx > gridDims.numCells[XX] ||
1233 cy < 0 || cy > gridDims.numCells[YY])
1236 "grid cell cx %d cy %d out of range (max %d %d)\n"
1237 "atom %f %f %f, grid->c0 %f %f",
1239 gridDims.numCells[XX],
1240 gridDims.numCells[YY],
1241 x[i][XX], x[i][YY], x[i][ZZ],
1242 gridDims.lowerCorner[XX],
1243 gridDims.lowerCorner[YY]);
1246 /* Take care of potential rouding issues */
1247 cx = std::min(cx, gridDims.numCells[XX] - 1);
1248 cy = std::min(cy, gridDims.numCells[YY] - 1);
1250 /* For the moment cell will contain only the, grid local,
1251 * x and y indices, not z.
1253 setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
1257 /* Put this moved particle after the end of the grid,
1258 * so we can process it later without using conditionals.
1260 setCellAndAtomCount(cell, numColumns, cxy_na, i);
1267 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1269 int cx = static_cast<int>((x[i][XX] - gridDims.lowerCorner[XX])*gridDims.invCellSize[XX]);
1270 int cy = static_cast<int>((x[i][YY] - gridDims.lowerCorner[YY])*gridDims.invCellSize[YY]);
1272 /* For non-home zones there could be particles outside
1273 * the non-bonded cut-off range, which have been communicated
1274 * for bonded interactions only. For the result it doesn't
1275 * matter where these end up on the grid. For performance
1276 * we put them in an extra row at the border.
1278 cx = std::max(cx, 0);
1279 cx = std::min(cx, gridDims.numCells[XX] - 1);
1280 cy = std::max(cy, 0);
1281 cy = std::min(cy, gridDims.numCells[YY] - 1);
1283 /* For the moment cell will contain only the, grid local,
1284 * x and y indices, not z.
1286 setCellAndAtomCount(cell, cx*gridDims.numCells[YY] + cy, cxy_na, i);
1291 /*! \brief Resizes grid and atom data which depend on the number of cells */
1292 static void resizeForNumberOfCells(const int numNbnxnAtoms,
1293 const int numAtomsMoved,
1295 nbnxn_atomdata_t *nbat)
1297 /* Note: nbs->cell was already resized before */
1299 /* To avoid conditionals we store the moved particles at the end of a,
1300 * make sure we have enough space.
1302 nbs->a.resize(numNbnxnAtoms + numAtomsMoved);
1304 /* Make space in nbat for storing the atom coordinates */
1305 nbat->resizeCoordinateBuffer(numNbnxnAtoms);
1308 void Grid::calcCellIndices(nbnxn_search *nbs,
1311 const gmx::UpdateGroupsCog *updateGroupsCog,
1315 gmx::ArrayRef<const gmx::RVec> x,
1318 nbnxn_atomdata_t *nbat)
1320 cellOffset_ = cellOffset;
1322 /* First compute all grid/column indices and store them in nbs->cell */
1323 nbs->cell.resize(atomEnd);
1325 const int nthread = gmx_omp_nthreads_get(emntPairsearch);
1327 #pragma omp parallel for num_threads(nthread) schedule(static)
1328 for (int thread = 0; thread < nthread; thread++)
1332 calc_column_indices(dimensions_,
1334 atomStart, atomEnd, x,
1335 ddZone, move, thread, nthread,
1336 nbs->cell, nbs->work[thread].cxy_na);
1338 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1341 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1343 /* Make the cell index as a function of x and y */
1347 for (int i = 0; i < numColumns() + 1; i++)
1349 /* We set ncz_max at the beginning of the loop iso at the end
1350 * to skip i=grid->ncx*grid->numCells[YY] which are moved particles
1351 * that do not need to be ordered on the grid.
1357 int cxy_na_i = nbs->work[0].cxy_na[i];
1358 for (int thread = 1; thread < nthread; thread++)
1360 cxy_na_i += nbs->work[thread].cxy_na[i];
1362 ncz = (cxy_na_i + numAtomsPerCell - 1)/numAtomsPerCell;
1363 if (nbat->XFormat == nbatX8)
1365 /* Make the number of cell a multiple of 2 */
1366 ncz = (ncz + 1) & ~1;
1368 cxy_ind_[i+1] = cxy_ind_[i] + ncz;
1369 /* Clear cxy_na_, so we can reuse the array below */
1372 numCellsTotal_ = cxy_ind_[numColumns()] - cxy_ind_[0];
1374 resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved, nbs, nbat);
1378 fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
1379 numAtomsPerCell, geometry_.numAtomsICluster, numCellsTotal_,
1380 dimensions_.numCells[XX], dimensions_.numCells[YY],
1381 numCellsTotal_/(static_cast<double>(numColumns())),
1386 for (int cy = 0; cy < dimensions_.numCells[YY]; cy++)
1388 for (int cx = 0; cx < dimensions_.numCells[XX]; cx++)
1390 fprintf(debug, " %2d", cxy_ind_[i + 1] - cxy_ind_[i]);
1393 fprintf(debug, "\n");
1398 /* Make sure the work array for sorting is large enough */
1399 const int worstCaseSortBufferSize = ncz_max*numAtomsPerCell*c_sortGridMaxSizeFactor;
1400 if (worstCaseSortBufferSize > gmx::index(nbs->work[0].sortBuffer.size()))
1402 for (nbnxn_search_work_t &work : nbs->work)
1404 /* Elements not in use should be -1 */
1405 work.sortBuffer.resize(worstCaseSortBufferSize, -1);
1409 /* Now we know the dimensions we can fill the grid.
1410 * This is the first, unsorted fill. We sort the columns after this.
1412 for (int i = atomStart; i < atomEnd; i++)
1414 /* At this point nbs->cell contains the local grid x,y indices */
1415 int cxy = nbs->cell[i];
1416 nbs->a[firstAtomInColumn(cxy) + cxy_na_[cxy]++] = i;
1421 /* Set the cell indices for the moved particles */
1422 int n0 = numCellsTotal_*numAtomsPerCell;
1423 int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()];
1426 for (int i = n0; i < n1; i++)
1428 nbs->cell[nbs->a[i]] = i;
1433 /* Sort the super-cell columns along z into the sub-cells. */
1434 #pragma omp parallel for num_threads(nthread) schedule(static)
1435 for (int thread = 0; thread < nthread; thread++)
1439 int columnStart = ((thread + 0)*numColumns())/nthread;
1440 int columnEnd = ((thread + 1)*numColumns())/nthread;
1441 if (geometry_.isSimple)
1443 sortColumnsCpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat,
1444 columnStart, columnEnd,
1445 nbs->work[thread].sortBuffer);
1449 sortColumnsGpuGeometry(nbs, ddZone, atomStart, atomEnd, atinfo, x, nbat,
1450 columnStart, columnEnd,
1451 nbs->work[thread].sortBuffer);
1454 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1457 if (geometry_.isSimple && nbat->XFormat == nbatX8)
1459 combine_bounding_box_pairs(*this, bb_, bbj_);
1462 if (!geometry_.isSimple)
1464 numClustersTotal_ = 0;
1465 for (int i = 0; i < numCellsTotal_; i++)
1467 numClustersTotal_ += numClusters_[i];
1473 if (geometry_.isSimple)
1475 print_bbsizes_simple(debug, *this);
1479 fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n",
1481 (atomEnd - atomStart)/static_cast<double>(numClustersTotal_));
1483 print_bbsizes_supersub(debug, *this);
1488 } // namespace Nbnxm
1490 // TODO: Move the functions below to nbnxn.cpp
1492 /* Sets up a grid and puts the atoms on the grid.
1493 * This function only operates on one domain of the domain decompostion.
1494 * Note that without domain decomposition there is only one domain.
1496 void nbnxn_put_on_grid(nonbonded_verlet_t *nbv,
1499 const rvec lowerCorner,
1500 const rvec upperCorner,
1501 const gmx::UpdateGroupsCog *updateGroupsCog,
1506 gmx::ArrayRef<const gmx::RVec> x,
1510 nbnxn_search *nbs = nbv->nbs.get();
1511 Nbnxm::Grid &grid = nbs->grid[ddZone];
1513 nbs_cycle_start(&nbs->cc[enbsCCgrid]);
1522 const Nbnxm::Grid &previousGrid = nbs->grid[ddZone - 1];
1523 cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell;
1526 const int n = atomEnd - atomStart;
1528 real maxAtomGroupRadius;
1531 copy_mat(box, nbs->box);
1533 nbs->natoms_local = atomEnd - numAtomsMoved;
1534 /* We assume that nbnxn_put_on_grid is called first
1535 * for the local atoms (ddZone=0).
1537 nbs->natoms_nonlocal = atomEnd - numAtomsMoved;
1539 maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0);
1543 fprintf(debug, "natoms_local = %5d atom_density = %5.1f\n",
1544 nbs->natoms_local, atomDensity);
1549 const Nbnxm::Grid::Dimensions &dimsGrid0 = nbs->grid[0].dimensions();
1550 atomDensity = dimsGrid0.atomDensity;
1551 maxAtomGroupRadius = dimsGrid0.maxAtomGroupRadius;
1553 nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, atomEnd);
1556 /* We always use the home zone (grid[0]) for setting the cell size,
1557 * since determining densities for non-local zones is difficult.
1559 grid.setDimensions(nbs,
1560 ddZone, n - numAtomsMoved,
1561 lowerCorner, upperCorner,
1563 maxAtomGroupRadius);
1565 nbnxn_atomdata_t *nbat = nbv->nbat.get();
1567 grid.calcCellIndices(nbs, ddZone, cellOffset, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
1571 nbat->natoms_local = nbat->numAtoms();
1573 if (ddZone == gmx::ssize(nbs->grid) - 1)
1575 /* We are done setting up all grids, we can resize the force buffers */
1576 nbat->resizeForceBuffers();
1579 nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
1582 /* Calls nbnxn_put_on_grid for all non-local domains */
1583 void nbnxn_put_on_grid_nonlocal(nonbonded_verlet_t *nbv,
1584 const struct gmx_domdec_zones_t *zones,
1586 gmx::ArrayRef<const gmx::RVec> x)
1588 for (int zone = 1; zone < zones->n; zone++)
1591 for (int d = 0; d < DIM; d++)
1593 c0[d] = zones->size[zone].bb_x0[d];
1594 c1[d] = zones->size[zone].bb_x1[d];
1597 nbnxn_put_on_grid(nbv, nullptr,
1600 zones->cg_range[zone],
1601 zones->cg_range[zone+1],
1609 void nbnxn_get_ncells(const nbnxn_search *nbs, int *ncx, int *ncy)
1611 *ncx = nbs->grid[0].dimensions().numCells[XX];
1612 *ncy = nbs->grid[0].dimensions().numCells[YY];
1615 gmx::ArrayRef<const int> nbnxn_get_atomorder(const nbnxn_search *nbs)
1617 /* Return the atom order for the home cell (index 0) */
1618 const Nbnxm::Grid &grid = nbs->grid[0];
1620 const int numIndices = grid.atomIndexEnd() - grid.firstAtomInColumn(0);
1622 return gmx::constArrayRefFromArray(nbs->a.data(), numIndices);
1625 void nbnxn_set_atomorder(nbnxn_search *nbs)
1627 /* Set the atom order for the home cell (index 0) */
1628 const Nbnxm::Grid &grid = nbs->grid[0];
1631 for (int cxy = 0; cxy < grid.numColumns(); cxy++)
1633 const int numAtoms = grid.numAtomsInColumn(cxy);
1634 int cellIndex = grid.firstCellInColumn(cxy)*grid.geometry().numAtomsPerCell;
1635 for (int i = 0; i < numAtoms; i++)
1637 nbs->a[cellIndex] = atomIndex;
1638 nbs->cell[atomIndex] = cellIndex;
1645 gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search *nbs)