2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements the Grid class.
42 * \author Berk Hess <hess@kth.se>
43 * \ingroup module_nbnxm
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdlib/gmx_omp_nthreads.h"
58 #include "gromacs/mdlib/updategroupscog.h"
59 #include "gromacs/mdtypes/atominfo.h"
60 #include "gromacs/nbnxm/atomdata.h"
61 #include "gromacs/simd/simd.h"
62 #include "gromacs/simd/vector_operations.h"
64 #include "boundingboxes.h"
65 #include "gridsetdata.h"
66 #include "nbnxm_geometry.h"
67 #include "pairlistparams.h"
72 Grid::Geometry::Geometry(const PairlistType pairlistType) :
73 isSimple(pairlistType != PairlistType::HierarchicalNxN),
74 numAtomsICluster(IClusterSizePerListType[pairlistType]),
75 numAtomsJCluster(JClusterSizePerListType[pairlistType]),
76 numAtomsPerCell((isSimple ? 1 : c_gpuNumClusterPerCell) * numAtomsICluster),
77 numAtomsICluster2Log(get_2log(numAtomsICluster))
81 Grid::Grid(const PairlistType pairlistType, const bool& haveFep) :
82 geometry_(pairlistType), haveFep_(haveFep)
86 /*! \brief Returns the atom density (> 0) of a rectangular grid */
87 static real gridAtomDensity(int numAtoms, const rvec lowerCorner, const rvec upperCorner)
93 /* To avoid zero density we use a minimum of 1 atom */
97 rvec_sub(upperCorner, lowerCorner, size);
99 return static_cast<real>(numAtoms) / (size[XX] * size[YY] * size[ZZ]);
102 // Get approximate dimensions of each cell. Returns the length along X and Y.
103 static std::array<real, DIM - 1> getTargetCellLength(const Grid::Geometry& geometry, const real atomDensity)
105 if (geometry.isSimple)
107 /* To minimize the zero interactions, we should make
108 * the largest of the i/j cell cubic.
110 int numAtomsInCell = std::max(geometry.numAtomsICluster, geometry.numAtomsJCluster);
112 /* Approximately cubic cells */
113 real tlen = std::cbrt(numAtomsInCell / atomDensity);
114 return { tlen, tlen };
118 /* Approximately cubic sub cells */
119 real tlen = std::cbrt(geometry.numAtomsICluster / atomDensity);
120 return { tlen * c_gpuNumClusterPerCellX, tlen * c_gpuNumClusterPerCellY };
124 static int getMaxNumCells(const Grid::Geometry& geometry, const int numAtoms, const int numColumns)
126 if (geometry.numAtomsJCluster <= geometry.numAtomsICluster)
128 return numAtoms / geometry.numAtomsPerCell + numColumns;
132 return numAtoms / geometry.numAtomsPerCell
133 + numColumns * geometry.numAtomsJCluster / geometry.numAtomsICluster;
137 void Grid::setDimensions(const int ddZone,
139 gmx::RVec lowerCorner,
140 gmx::RVec upperCorner,
142 const real maxAtomGroupRadius,
144 gmx::PinningPolicy pinningPolicy)
146 /* We allow passing lowerCorner=upperCorner, in which case we need to
147 * create a finite sized bounding box to avoid division by zero.
148 * We use a minimum size such that the volume fits in float with some
149 * margin for computing and using the atom number density.
151 constexpr real c_minimumGridSize = 1e-10;
152 for (int d = 0; d < DIM; d++)
154 GMX_ASSERT(upperCorner[d] >= lowerCorner[d],
155 "Upper corner should be larger than the lower corner");
156 if (upperCorner[d] - lowerCorner[d] < c_minimumGridSize)
158 /* Ensure we apply a correction to the bounding box */
160 std::max(std::abs(lowerCorner[d]) * GMX_REAL_EPS, 0.5_real * c_minimumGridSize);
161 lowerCorner[d] -= correction;
162 upperCorner[d] += correction;
166 /* For the home zone we compute the density when not set (=-1) or when =0 */
167 if (ddZone == 0 && atomDensity <= 0)
169 atomDensity = gridAtomDensity(numAtoms, lowerCorner, upperCorner);
172 dimensions_.atomDensity = atomDensity;
173 dimensions_.maxAtomGroupRadius = maxAtomGroupRadius;
176 rvec_sub(upperCorner, lowerCorner, size);
178 if (numAtoms > geometry_.numAtomsPerCell)
180 GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive");
182 /* target cell length */
183 const std::array<real, DIM - 1> tlen = getTargetCellLength(geometry_, atomDensity);
185 /* We round ncx and ncy down, because we get less cell pairs
186 * in the pairlist when the fixed cell dimensions (x,y) are
187 * larger than the variable one (z) than the other way around.
189 dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX] / tlen[XX]));
190 dimensions_.numCells[YY] = std::max(1, static_cast<int>(size[YY] / tlen[YY]));
194 dimensions_.numCells[XX] = 1;
195 dimensions_.numCells[YY] = 1;
198 for (int d = 0; d < DIM - 1; d++)
200 dimensions_.cellSize[d] = size[d] / dimensions_.numCells[d];
201 dimensions_.invCellSize[d] = 1 / dimensions_.cellSize[d];
206 /* This is a non-home zone, add an extra row of cells
207 * for particles communicated for bonded interactions.
208 * These can be beyond the cut-off. It doesn't matter where
209 * they end up on the grid, but for performance it's better
210 * if they don't end up in cells that can be within cut-off range.
212 dimensions_.numCells[XX]++;
213 dimensions_.numCells[YY]++;
216 /* We need one additional cell entry for particles moved by DD */
217 cxy_na_.resize(numColumns() + 1);
218 cxy_ind_.resize(numColumns() + 2);
219 changePinningPolicy(&cxy_na_, pinningPolicy);
220 changePinningPolicy(&cxy_ind_, pinningPolicy);
222 /* Worst case scenario of 1 atom in each last cell */
223 const int maxNumCells = getMaxNumCells(geometry_, numAtoms, numColumns());
225 if (!geometry_.isSimple)
227 numClusters_.resize(maxNumCells);
229 bbcz_.resize(maxNumCells);
231 /* This resize also zeros the contents, this avoid possible
232 * floating exceptions in SIMD with the unused bb elements.
234 if (geometry_.isSimple)
236 bb_.resize(maxNumCells);
241 pbb_.resize(packedBoundingBoxesIndex(maxNumCells * c_gpuNumClusterPerCell));
243 bb_.resize(maxNumCells * c_gpuNumClusterPerCell);
247 if (geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
253 GMX_ASSERT(geometry_.isSimple, "Only CPU lists should have different i/j cluster sizes");
255 bbjStorage_.resize(maxNumCells * geometry_.numAtomsICluster / geometry_.numAtomsJCluster);
259 flags_.resize(maxNumCells);
262 fep_.resize(maxNumCells * geometry_.numAtomsPerCell / geometry_.numAtomsICluster);
265 copy_rvec(lowerCorner, dimensions_.lowerCorner);
266 copy_rvec(upperCorner, dimensions_.upperCorner);
267 copy_rvec(size, dimensions_.gridSize);
270 /* We need to sort paricles in grid columns on z-coordinate.
271 * As particle are very often distributed homogeneously, we use a sorting
272 * algorithm similar to pigeonhole sort. We multiply the z-coordinate
273 * by a factor, cast to an int and try to store in that hole. If the hole
274 * is full, we move this or another particle. A second pass is needed to make
275 * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
276 * 4 is the optimal value for homogeneous particle distribution and allows
277 * for an O(#particles) sort up till distributions were all particles are
278 * concentrated in 1/4 of the space. No NlogN fallback is implemented,
279 * as it can be expensive to detect imhomogeneous particle distributions.
281 /*! \brief Ratio of grid cells to atoms */
282 static constexpr int c_sortGridRatio = 4;
283 /*! \brief Maximum ratio of holes used, in the worst case all particles
284 * end up in the last hole and we need num. atoms extra holes at the end.
286 static constexpr int c_sortGridMaxSizeFactor = c_sortGridRatio + 1;
288 /*! \brief Sorts particle index a on coordinates x along dim.
290 * Backwards tells if we want decreasing iso increasing coordinates.
291 * h0 is the minimum of the coordinate range.
292 * invh is the 1/length of the sorting range.
293 * n_per_h (>=n) is the expected average number of particles per 1/invh
294 * sort is the sorting work array.
295 * sort should have a size of at least n_per_h*c_sortGridRatio + n,
296 * or easier, allocate at least n*c_sortGridMaxSizeFactor elements.
298 static void sort_atoms(int dim,
300 int gmx_unused dd_zone,
301 bool gmx_unused relevantAtomsAreWithinGridBounds,
304 gmx::ArrayRef<const gmx::RVec> x,
308 gmx::ArrayRef<int> sort)
316 GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h");
318 /* Transform the inverse range height into the inverse hole height */
319 invh *= n_per_h * c_sortGridRatio;
321 /* Set nsort to the maximum possible number of holes used.
322 * In worst case all n elements end up in the last bin.
324 int nsort = n_per_h * c_sortGridRatio + n;
326 /* Determine the index range used, so we can limit it for the second pass */
327 int zi_min = INT_MAX;
330 /* Sort the particles using a simple index sort */
331 for (int i = 0; i < n; i++)
333 /* The cast takes care of float-point rounding effects below zero.
334 * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
335 * times the box height out of the box.
337 int zi = static_cast<int>((x[a[i]][dim] - h0) * invh);
340 /* As we can have rounding effect, we use > iso >= here */
341 if (relevantAtomsAreWithinGridBounds && (zi < 0 || (dd_zone == 0 && zi > n_per_h * c_sortGridRatio)))
344 "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
360 /* In a non-local domain, particles communicated for bonded interactions
361 * can be far beyond the grid size, which is set by the non-bonded
362 * cut-off distance. We sort such particles into the last cell.
364 if (zi > n_per_h * c_sortGridRatio)
366 zi = n_per_h * c_sortGridRatio;
369 /* Ideally this particle should go in sort cell zi,
370 * but that might already be in use,
371 * in that case find the first empty cell higher up
376 zi_min = std::min(zi_min, zi);
377 zi_max = std::max(zi_max, zi);
381 /* We have multiple atoms in the same sorting slot.
382 * Sort on real z for minimal bounding box size.
383 * There is an extra check for identical z to ensure
384 * well-defined output order, independent of input order
385 * to ensure binary reproducibility after restarts.
388 && (x[a[i]][dim] > x[sort[zi]][dim]
389 || (x[a[i]][dim] == x[sort[zi]][dim] && a[i] > sort[zi])))
396 /* Shift all elements by one slot until we find an empty slot */
399 while (sort[zim] >= 0)
407 zi_max = std::max(zi_max, zim);
410 zi_max = std::max(zi_max, zi);
417 for (int zi = 0; zi < nsort; zi++)
428 for (int zi = zi_max; zi >= zi_min; zi--)
439 gmx_incons("Lost particles while sorting");
444 //! Returns double up to one least significant float bit smaller than x
445 static double R2F_D(const float x)
447 return static_cast<float>(x >= 0 ? (1 - GMX_FLOAT_EPS) * x : (1 + GMX_FLOAT_EPS) * x);
449 //! Returns double up to one least significant float bit larger than x
450 static double R2F_U(const float x)
452 return static_cast<float>(x >= 0 ? (1 + GMX_FLOAT_EPS) * x : (1 - GMX_FLOAT_EPS) * x);
456 static float R2F_D(const float x)
461 static float R2F_U(const float x)
467 //! Computes the bounding box for na coordinates in order x,y,z, bb order xyz0
468 static void calc_bounding_box(int na, int stride, const real* x, BoundingBox* bb)
478 for (int j = 1; j < na; j++)
480 xl = std::min(xl, x[i + XX]);
481 xh = std::max(xh, x[i + XX]);
482 yl = std::min(yl, x[i + YY]);
483 yh = std::max(yh, x[i + YY]);
484 zl = std::min(zl, x[i + ZZ]);
485 zh = std::max(zh, x[i + ZZ]);
488 /* Note: possible double to float conversion here */
489 bb->lower.x = R2F_D(xl);
490 bb->lower.y = R2F_D(yl);
491 bb->lower.z = R2F_D(zl);
492 bb->upper.x = R2F_U(xh);
493 bb->upper.y = R2F_U(yh);
494 bb->upper.z = R2F_U(zh);
497 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
498 static void calc_bounding_box_x_x4(int na, const real* x, BoundingBox* bb)
500 real xl = x[XX * c_packX4];
501 real xh = x[XX * c_packX4];
502 real yl = x[YY * c_packX4];
503 real yh = x[YY * c_packX4];
504 real zl = x[ZZ * c_packX4];
505 real zh = x[ZZ * c_packX4];
506 for (int j = 1; j < na; j++)
508 xl = std::min(xl, x[j + XX * c_packX4]);
509 xh = std::max(xh, x[j + XX * c_packX4]);
510 yl = std::min(yl, x[j + YY * c_packX4]);
511 yh = std::max(yh, x[j + YY * c_packX4]);
512 zl = std::min(zl, x[j + ZZ * c_packX4]);
513 zh = std::max(zh, x[j + ZZ * c_packX4]);
515 /* Note: possible double to float conversion here */
516 bb->lower.x = R2F_D(xl);
517 bb->lower.y = R2F_D(yl);
518 bb->lower.z = R2F_D(zl);
519 bb->upper.x = R2F_U(xh);
520 bb->upper.y = R2F_U(yh);
521 bb->upper.z = R2F_U(zh);
524 /*! \brief Computes the bounding box for na coordinates, bb order xyz0 */
525 static void calc_bounding_box_x_x8(int na, const real* x, BoundingBox* bb)
527 real xl = x[XX * c_packX8];
528 real xh = x[XX * c_packX8];
529 real yl = x[YY * c_packX8];
530 real yh = x[YY * c_packX8];
531 real zl = x[ZZ * c_packX8];
532 real zh = x[ZZ * c_packX8];
533 for (int j = 1; j < na; j++)
535 xl = std::min(xl, x[j + XX * c_packX8]);
536 xh = std::max(xh, x[j + XX * c_packX8]);
537 yl = std::min(yl, x[j + YY * c_packX8]);
538 yh = std::max(yh, x[j + YY * c_packX8]);
539 zl = std::min(zl, x[j + ZZ * c_packX8]);
540 zh = std::max(zh, x[j + ZZ * c_packX8]);
542 /* Note: possible double to float conversion here */
543 bb->lower.x = R2F_D(xl);
544 bb->lower.y = R2F_D(yl);
545 bb->lower.z = R2F_D(zl);
546 bb->upper.x = R2F_U(xh);
547 bb->upper.y = R2F_U(yh);
548 bb->upper.z = R2F_U(zh);
551 /*! \brief Computes the bounding box for na packed coordinates, bb order xyz0 */
552 gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real* x, BoundingBox* bb, BoundingBox* bbj)
554 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
557 calc_bounding_box_x_x4(std::min(na, 2), x, bbj);
561 calc_bounding_box_x_x4(std::min(na - 2, 2), x + (c_packX4 >> 1), bbj + 1);
565 /* Set the "empty" bounding box to the same as the first one,
566 * so we don't need to treat special cases in the rest of the code.
568 #if NBNXN_SEARCH_BB_SIMD4
569 store4(bbj[1].lower.ptr(), load4(bbj[0].lower.ptr()));
570 store4(bbj[1].upper.ptr(), load4(bbj[0].upper.ptr()));
576 #if NBNXN_SEARCH_BB_SIMD4
577 store4(bb->lower.ptr(), min(load4(bbj[0].lower.ptr()), load4(bbj[1].lower.ptr())));
578 store4(bb->upper.ptr(), max(load4(bbj[0].upper.ptr()), load4(bbj[1].upper.ptr())));
581 bb->lower = BoundingBox::Corner::min(bbj[0].lower, bbj[1].lower);
582 bb->upper = BoundingBox::Corner::max(bbj[0].upper, bbj[1].upper);
589 /*! \brief Computes the bounding box for na coordinates in order xyz, bb order xxxxyyyyzzzz */
590 static void calc_bounding_box_xxxx(int na, int stride, const real* x, float* bb)
600 for (int j = 1; j < na; j++)
602 xl = std::min(xl, x[i + XX]);
603 xh = std::max(xh, x[i + XX]);
604 yl = std::min(yl, x[i + YY]);
605 yh = std::max(yh, x[i + YY]);
606 zl = std::min(zl, x[i + ZZ]);
607 zh = std::max(zh, x[i + ZZ]);
610 /* Note: possible double to float conversion here */
611 bb[0 * c_packedBoundingBoxesDimSize] = R2F_D(xl);
612 bb[1 * c_packedBoundingBoxesDimSize] = R2F_D(yl);
613 bb[2 * c_packedBoundingBoxesDimSize] = R2F_D(zl);
614 bb[3 * c_packedBoundingBoxesDimSize] = R2F_U(xh);
615 bb[4 * c_packedBoundingBoxesDimSize] = R2F_U(yh);
616 bb[5 * c_packedBoundingBoxesDimSize] = R2F_U(zh);
619 #endif /* NBNXN_BBXXXX */
621 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
623 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xyz0 */
624 static void calc_bounding_box_simd4(int na, const float* x, BoundingBox* bb)
626 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
629 static_assert(sizeof(BoundingBox::Corner) == GMX_SIMD4_WIDTH * sizeof(float),
630 "The Corner struct should hold exactly 4 floats");
632 Simd4Float bb_0_S = load4(x);
633 Simd4Float bb_1_S = bb_0_S;
635 for (int i = 1; i < na; i++)
637 Simd4Float x_S = load4(x + i * GMX_SIMD4_WIDTH);
638 bb_0_S = min(bb_0_S, x_S);
639 bb_1_S = max(bb_1_S, x_S);
642 store4(bb->lower.ptr(), bb_0_S);
643 store4(bb->upper.ptr(), bb_1_S);
648 /*! \brief Computes the bounding box for na coordinates in order xyz?, bb order xxxxyyyyzzzz */
649 static void calc_bounding_box_xxxx_simd4(int na, const float* x, BoundingBox* bb_work_aligned, real* bb)
651 calc_bounding_box_simd4(na, x, bb_work_aligned);
653 bb[0 * c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.x;
654 bb[1 * c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.y;
655 bb[2 * c_packedBoundingBoxesDimSize] = bb_work_aligned->lower.z;
656 bb[3 * c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.x;
657 bb[4 * c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.y;
658 bb[5 * c_packedBoundingBoxesDimSize] = bb_work_aligned->upper.z;
661 # endif /* NBNXN_BBXXXX */
663 #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
666 /*! \brief Combines pairs of consecutive bounding boxes */
667 static void combine_bounding_box_pairs(const Grid& grid,
668 gmx::ArrayRef<const BoundingBox> bb,
669 gmx::ArrayRef<BoundingBox> bbj)
671 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
674 for (int i = 0; i < grid.numColumns(); i++)
676 /* Starting bb in a column is expected to be 2-aligned */
677 const int sc2 = grid.firstCellInColumn(i) >> 1;
678 /* For odd numbers skip the last bb here */
679 const int nc2 = (grid.numAtomsInColumn(i) + 3) >> (2 + 1);
680 for (int c2 = sc2; c2 < sc2 + nc2; c2++)
682 #if NBNXN_SEARCH_BB_SIMD4
683 Simd4Float min_S, max_S;
685 min_S = min(load4(bb[c2 * 2 + 0].lower.ptr()), load4(bb[c2 * 2 + 1].lower.ptr()));
686 max_S = max(load4(bb[c2 * 2 + 0].upper.ptr()), load4(bb[c2 * 2 + 1].upper.ptr()));
687 store4(bbj[c2].lower.ptr(), min_S);
688 store4(bbj[c2].upper.ptr(), max_S);
690 bbj[c2].lower = BoundingBox::Corner::min(bb[c2 * 2 + 0].lower, bb[c2 * 2 + 1].lower);
691 bbj[c2].upper = BoundingBox::Corner::max(bb[c2 * 2 + 0].upper, bb[c2 * 2 + 1].upper);
694 if (((grid.numAtomsInColumn(i) + 3) >> 2) & 1)
696 /* The bb count in this column is odd: duplicate the last bb */
698 bbj[c2].lower = bb[c2 * 2].lower;
699 bbj[c2].upper = bb[c2 * 2].upper;
705 /*! \brief Prints the average bb size, used for debug output */
706 static void print_bbsizes_simple(FILE* fp, const Grid& grid)
709 for (int c = 0; c < grid.numCells(); c++)
711 const BoundingBox& bb = grid.iBoundingBoxes()[c];
712 ba[XX] += bb.upper.x - bb.lower.x;
713 ba[YY] += bb.upper.y - bb.lower.y;
714 ba[ZZ] += bb.upper.z - bb.lower.z;
716 if (grid.numCells() > 0)
718 dsvmul(1.0 / grid.numCells(), ba, ba);
721 const Grid::Dimensions& dims = grid.dimensions();
723 (dims.atomDensity > 0 ? grid.geometry().numAtomsICluster
724 / (dims.atomDensity * dims.cellSize[XX] * dims.cellSize[YY])
728 "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
735 ba[XX] * dims.invCellSize[XX],
736 ba[YY] * dims.invCellSize[YY],
737 dims.atomDensity > 0 ? ba[ZZ] / avgCellSizeZ : 0.0);
740 /*! \brief Prints the average bb size, used for debug output */
741 static void print_bbsizes_supersub(FILE* fp, const Grid& grid)
747 for (int c = 0; c < grid.numCells(); c++)
750 for (int s = 0; s < grid.numClustersPerCell()[c]; s += c_packedBoundingBoxesDimSize)
752 int cs_w = (c * c_gpuNumClusterPerCell + s) / c_packedBoundingBoxesDimSize;
753 auto boundingBoxes = grid.packedBoundingBoxes().subArray(
754 cs_w * c_packedBoundingBoxesSize, c_packedBoundingBoxesSize);
755 for (int i = 0; i < c_packedBoundingBoxesDimSize; i++)
757 for (int d = 0; d < DIM; d++)
759 ba[d] += boundingBoxes[(DIM + d) * c_packedBoundingBoxesDimSize + i]
760 - boundingBoxes[(0 + d) * c_packedBoundingBoxesDimSize + i];
765 for (int s = 0; s < grid.numClustersPerCell()[c]; s++)
767 const BoundingBox& bb = grid.iBoundingBoxes()[c * c_gpuNumClusterPerCell + s];
768 ba[XX] += bb.upper.x - bb.lower.x;
769 ba[YY] += bb.upper.y - bb.lower.y;
770 ba[ZZ] += bb.upper.z - bb.lower.z;
773 ns += grid.numClustersPerCell()[c];
775 dsvmul(1.0 / ns, ba, ba);
777 const Grid::Dimensions& dims = grid.dimensions();
778 const real avgClusterSizeZ =
779 (dims.atomDensity > 0 ? grid.geometry().numAtomsPerCell
780 / (dims.atomDensity * dims.cellSize[XX]
781 * dims.cellSize[YY] * c_gpuNumClusterPerCellZ)
785 "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
786 dims.cellSize[XX] / c_gpuNumClusterPerCellX,
787 dims.cellSize[YY] / c_gpuNumClusterPerCellY,
792 ba[XX] * c_gpuNumClusterPerCellX * dims.invCellSize[XX],
793 ba[YY] * c_gpuNumClusterPerCellY * dims.invCellSize[YY],
794 dims.atomDensity > 0 ? ba[ZZ] / avgClusterSizeZ : 0.0);
797 /*!\brief Set non-bonded interaction flags for the current cluster.
799 * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
801 static void sort_cluster_on_flag(int numAtomsInCluster,
804 gmx::ArrayRef<const int> atomInfo,
805 gmx::ArrayRef<int> order,
808 constexpr int c_maxNumAtomsInCluster = 8;
809 int sort1[c_maxNumAtomsInCluster];
810 int sort2[c_maxNumAtomsInCluster];
812 GMX_ASSERT(numAtomsInCluster <= c_maxNumAtomsInCluster,
813 "Need to increase c_maxNumAtomsInCluster to support larger clusters");
818 for (int s = atomStart; s < atomEnd; s += numAtomsInCluster)
820 /* Make lists for this (sub-)cell on atoms with and without LJ */
825 for (int a = s; a < std::min(s + numAtomsInCluster, atomEnd); a++)
827 haveQ = haveQ || bool(atomInfo[order[a]] & gmx::sc_atomInfo_HasCharge);
829 if (atomInfo[order[a]] & (gmx::sc_atomInfo_HasVdw))
831 sort1[n1++] = order[a];
836 sort2[n2++] = order[a];
840 /* If we don't have atoms with LJ, there's nothing to sort */
843 *flags |= NBNXN_CI_DO_LJ(subc);
845 if (2 * n1 <= numAtomsInCluster)
847 /* Only sort when strictly necessary. Ordering particles
848 * Ordering particles can lead to less accurate summation
849 * due to rounding, both for LJ and Coulomb interactions.
851 if (2 * (a_lj_max - s) >= numAtomsInCluster)
853 for (int i = 0; i < n1; i++)
855 order[atomStart + i] = sort1[i];
857 for (int j = 0; j < n2; j++)
859 order[atomStart + n1 + j] = sort2[j];
863 *flags |= NBNXN_CI_HALF_LJ(subc);
868 *flags |= NBNXN_CI_DO_COUL(subc);
874 /*! \brief Fill a pair search cell with atoms.
876 * Potentially sorts atoms and sets the interaction flags.
878 void Grid::fillCell(GridSetData* gridSetData,
879 nbnxn_atomdata_t* nbat,
882 gmx::ArrayRef<const int> atomInfo,
883 gmx::ArrayRef<const gmx::RVec> x,
884 BoundingBox gmx_unused* bb_work_aligned)
886 const int numAtoms = atomEnd - atomStart;
888 const gmx::ArrayRef<int>& cells = gridSetData->cells;
889 const gmx::ArrayRef<int>& atomIndices = gridSetData->atomIndices;
891 if (geometry_.isSimple)
893 /* Note that non-local grids are already sorted.
894 * Then sort_cluster_on_flag will only set the flags and the sorting
895 * will not affect the atom order.
897 sort_cluster_on_flag(geometry_.numAtomsICluster,
902 flags_.data() + atomToCluster(atomStart) - cellOffset_);
907 /* Set the fep flag for perturbed atoms in this (sub-)cell */
909 /* The grid-local cluster/(sub-)cell index */
910 int cell = atomToCluster(atomStart)
911 - cellOffset_ * (geometry_.isSimple ? 1 : c_gpuNumClusterPerCell);
913 for (int at = atomStart; at < atomEnd; at++)
915 if (atomIndices[at] >= 0 && (atomInfo[atomIndices[at]] & gmx::sc_atomInfo_FreeEnergyPerturbation))
917 fep_[cell] |= (1 << (at - atomStart));
922 /* Now we have sorted the atoms, set the cell indices */
923 for (int at = atomStart; at < atomEnd; at++)
925 cells[atomIndices[at]] = at;
928 copy_rvec_to_nbat_real(atomIndices.data() + atomStart,
930 geometry_.numAtomsICluster,
931 as_rvec_array(x.data()),
936 if (nbat->XFormat == nbatX4)
938 /* Store the bounding boxes as xyz.xyz. */
939 size_t offset = atomToCluster(atomStart - cellOffset_ * geometry_.numAtomsICluster);
940 BoundingBox* bb_ptr = bb_.data() + offset;
942 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
943 if (2 * geometry_.numAtomsJCluster == geometry_.numAtomsICluster)
945 calc_bounding_box_x_x4_halves(numAtoms,
946 nbat->x().data() + atom_to_x_index<c_packX4>(atomStart),
948 bbj_.data() + offset * 2);
953 calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
956 else if (nbat->XFormat == nbatX8)
958 /* Store the bounding boxes as xyz.xyz. */
959 size_t offset = atomToCluster(atomStart - cellOffset_ * geometry_.numAtomsICluster);
960 BoundingBox* bb_ptr = bb_.data() + offset;
962 calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
965 else if (!geometry_.isSimple)
967 /* Store the bounding boxes in a format convenient
968 * for SIMD4 calculations: xxxxyyyyzzzz...
970 const int clusterIndex = ((atomStart - cellOffset_ * geometry_.numAtomsPerCell)
971 >> geometry_.numAtomsICluster2Log);
972 float* pbb_ptr = pbb_.data() + packedBoundingBoxesIndex(clusterIndex)
973 + (clusterIndex & (c_packedBoundingBoxesDimSize - 1));
975 # if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
976 if (nbat->XFormat == nbatXYZQ)
978 GMX_ASSERT(bb_work_aligned != nullptr, "Must have valid aligned work structure");
979 calc_bounding_box_xxxx_simd4(
980 numAtoms, nbat->x().data() + atomStart * nbat->xstride, bb_work_aligned, pbb_ptr);
985 calc_bounding_box_xxxx(
986 numAtoms, nbat->xstride, nbat->x().data() + atomStart * nbat->xstride, pbb_ptr);
991 "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
992 atomToCluster(atomStart),
993 pbb_ptr[0 * c_packedBoundingBoxesDimSize],
994 pbb_ptr[3 * c_packedBoundingBoxesDimSize],
995 pbb_ptr[1 * c_packedBoundingBoxesDimSize],
996 pbb_ptr[4 * c_packedBoundingBoxesDimSize],
997 pbb_ptr[2 * c_packedBoundingBoxesDimSize],
998 pbb_ptr[5 * c_packedBoundingBoxesDimSize]);
1004 /* Store the bounding boxes as xyz.xyz. */
1005 BoundingBox* bb_ptr =
1006 bb_.data() + atomToCluster(atomStart - cellOffset_ * geometry_.numAtomsPerCell);
1008 calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart * nbat->xstride, bb_ptr);
1012 int bbo = atomToCluster(atomStart - cellOffset_ * geometry_.numAtomsPerCell);
1014 "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
1015 atomToCluster(atomStart),
1026 void Grid::sortColumnsCpuGeometry(GridSetData* gridSetData,
1028 gmx::ArrayRef<const int> atomInfo,
1029 gmx::ArrayRef<const gmx::RVec> x,
1030 nbnxn_atomdata_t* nbat,
1031 const gmx::Range<int> columnRange,
1032 gmx::ArrayRef<int> sort_work)
1037 "cell_offset %d sorting columns %d - %d\n",
1039 *columnRange.begin(),
1040 *columnRange.end());
1043 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1045 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1047 /* Sort the atoms within each x,y column in 3 dimensions */
1048 for (int cxy : columnRange)
1050 const int numAtoms = numAtomsInColumn(cxy);
1051 const int numCellsZ = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1052 const int atomOffset = firstAtomInColumn(cxy);
1054 /* Sort the atoms within each x,y column on z coordinate */
1058 relevantAtomsAreWithinGridBounds,
1059 gridSetData->atomIndices.data() + atomOffset,
1062 dimensions_.lowerCorner[ZZ],
1063 1.0 / dimensions_.gridSize[ZZ],
1064 numCellsZ * numAtomsPerCell,
1067 /* Fill the ncz cells in this column */
1068 const int firstCell = firstCellInColumn(cxy);
1069 int cellFilled = firstCell;
1070 for (int cellZ = 0; cellZ < numCellsZ; cellZ++)
1072 const int cell = firstCell + cellZ;
1074 const int atomOffsetCell = atomOffset + cellZ * numAtomsPerCell;
1075 const int numAtomsLeftInColumn = std::max(numAtoms - (atomOffsetCell - atomOffset), 0);
1076 const int numAtomsCell = std::min(numAtomsPerCell, numAtomsLeftInColumn);
1078 fillCell(gridSetData, nbat, atomOffsetCell, atomOffsetCell + numAtomsCell, atomInfo, x, nullptr);
1080 /* This copy to bbcz is not really necessary.
1081 * But it allows to use the same grid search code
1082 * for the simple and supersub cell setups.
1084 if (numAtomsCell > 0)
1088 bbcz_[cell].lower = bb_[cellFilled].lower.z;
1089 bbcz_[cell].upper = bb_[cellFilled].upper.z;
1092 /* Set the unused atom indices to -1 */
1093 for (int ind = numAtoms; ind < numCellsZ * numAtomsPerCell; ind++)
1095 gridSetData->atomIndices[atomOffset + ind] = -1;
1100 /* Spatially sort the atoms within one grid column */
1101 void Grid::sortColumnsGpuGeometry(GridSetData* gridSetData,
1103 gmx::ArrayRef<const int> atomInfo,
1104 gmx::ArrayRef<const gmx::RVec> x,
1105 nbnxn_atomdata_t* nbat,
1106 const gmx::Range<int> columnRange,
1107 gmx::ArrayRef<int> sort_work)
1109 BoundingBox bb_work_array[2];
1110 auto* bb_work_aligned = reinterpret_cast<BoundingBox*>(
1111 (reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
1116 "cell_offset %d sorting columns %d - %d\n",
1118 *columnRange.begin(),
1119 *columnRange.end());
1122 const bool relevantAtomsAreWithinGridBounds = (dimensions_.maxAtomGroupRadius == 0);
1124 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1126 const int subdiv_x = geometry_.numAtomsICluster;
1127 const int subdiv_y = c_gpuNumClusterPerCellX * subdiv_x;
1128 const int subdiv_z = c_gpuNumClusterPerCellY * subdiv_y;
1130 /* Extract the atom index array that will be filled here */
1131 const gmx::ArrayRef<int>& atomIndices = gridSetData->atomIndices;
1133 /* Sort the atoms within each x,y column in 3 dimensions.
1134 * Loop over all columns on the x/y grid.
1136 for (int cxy : columnRange)
1138 const int gridX = cxy / dimensions_.numCells[YY];
1139 const int gridY = cxy - gridX * dimensions_.numCells[YY];
1141 const int numAtomsInColumn = cxy_na_[cxy];
1142 const int numCellsInColumn = cxy_ind_[cxy + 1] - cxy_ind_[cxy];
1143 const int atomOffset = firstAtomInColumn(cxy);
1145 /* Sort the atoms within each x,y column on z coordinate */
1149 relevantAtomsAreWithinGridBounds,
1150 atomIndices.data() + atomOffset,
1153 dimensions_.lowerCorner[ZZ],
1154 1.0 / dimensions_.gridSize[ZZ],
1155 numCellsInColumn * numAtomsPerCell,
1158 /* This loop goes over the cells and clusters along z at once */
1159 for (int sub_z = 0; sub_z < numCellsInColumn * c_gpuNumClusterPerCellZ; sub_z++)
1161 const int atomOffsetZ = atomOffset + sub_z * subdiv_z;
1162 const int numAtomsZ = std::min(subdiv_z, numAtomsInColumn - (atomOffsetZ - atomOffset));
1164 /* We have already sorted on z */
1166 if (sub_z % c_gpuNumClusterPerCellZ == 0)
1168 cz = sub_z / c_gpuNumClusterPerCellZ;
1169 const int cell = cxy_ind_[cxy] + cz;
1171 /* The number of atoms in this cell/super-cluster */
1172 const int numAtoms =
1173 std::min(numAtomsPerCell, numAtomsInColumn - (atomOffsetZ - atomOffset));
1175 numClusters_[cell] = std::min(
1176 c_gpuNumClusterPerCell,
1177 (numAtoms + geometry_.numAtomsICluster - 1) / geometry_.numAtomsICluster);
1179 /* Store the z-boundaries of the bounding box of the cell */
1180 bbcz_[cell].lower = x[atomIndices[atomOffsetZ]][ZZ];
1181 bbcz_[cell].upper = x[atomIndices[atomOffsetZ + numAtoms - 1]][ZZ];
1184 if (c_gpuNumClusterPerCellY > 1)
1186 /* Sort the atoms along y */
1190 relevantAtomsAreWithinGridBounds,
1191 atomIndices.data() + atomOffsetZ,
1194 dimensions_.lowerCorner[YY] + gridY * dimensions_.cellSize[YY],
1195 dimensions_.invCellSize[YY],
1200 for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++)
1202 const int atomOffsetY = atomOffsetZ + sub_y * subdiv_y;
1203 const int numAtomsY = std::min(subdiv_y, numAtomsInColumn - (atomOffsetY - atomOffset));
1205 if (c_gpuNumClusterPerCellX > 1)
1207 /* Sort the atoms along x */
1209 ((cz * c_gpuNumClusterPerCellY + sub_y) & 1) != 0,
1211 relevantAtomsAreWithinGridBounds,
1212 atomIndices.data() + atomOffsetY,
1215 dimensions_.lowerCorner[XX] + gridX * dimensions_.cellSize[XX],
1216 dimensions_.invCellSize[XX],
1221 for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++)
1223 const int atomOffsetX = atomOffsetY + sub_x * subdiv_x;
1224 const int numAtomsX =
1225 std::min(subdiv_x, numAtomsInColumn - (atomOffsetX - atomOffset));
1227 fillCell(gridSetData, nbat, atomOffsetX, atomOffsetX + numAtomsX, atomInfo, x, bb_work_aligned);
1232 /* Set the unused atom indices to -1 */
1233 for (int ind = numAtomsInColumn; ind < numCellsInColumn * numAtomsPerCell; ind++)
1235 atomIndices[atomOffset + ind] = -1;
1240 /*! \brief Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
1241 static void setCellAndAtomCount(gmx::ArrayRef<int> cell, int cellIndex, gmx::ArrayRef<int> cxy_na, int atomIndex)
1243 cell[atomIndex] = cellIndex;
1244 cxy_na[cellIndex] += 1;
1247 void Grid::calcColumnIndices(const Grid::Dimensions& gridDims,
1248 const gmx::UpdateGroupsCog* updateGroupsCog,
1249 const gmx::Range<int> atomRange,
1250 gmx::ArrayRef<const gmx::RVec> x,
1255 gmx::ArrayRef<int> cell,
1256 gmx::ArrayRef<int> cxy_na)
1258 const int numColumns = gridDims.numCells[XX] * gridDims.numCells[YY];
1260 /* We add one extra cell for particles which moved during DD */
1261 for (int i = 0; i < numColumns; i++)
1266 int taskAtomStart = *atomRange.begin() + static_cast<int>((thread + 0) * atomRange.size()) / nthread;
1267 int taskAtomEnd = *atomRange.begin() + static_cast<int>((thread + 1) * atomRange.size()) / nthread;
1272 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1274 if (move == nullptr || move[i] >= 0)
1277 const gmx::RVec& coord = (updateGroupsCog ? updateGroupsCog->cogForAtom(i) : x[i]);
1279 /* We need to be careful with rounding,
1280 * particles might be a few bits outside the local zone.
1281 * The int cast takes care of the lower bound,
1282 * we will explicitly take care of the upper bound.
1284 int cx = static_cast<int>((coord[XX] - gridDims.lowerCorner[XX])
1285 * gridDims.invCellSize[XX]);
1286 int cy = static_cast<int>((coord[YY] - gridDims.lowerCorner[YY])
1287 * gridDims.invCellSize[YY]);
1290 if (cx < 0 || cx > gridDims.numCells[XX] || cy < 0 || cy > gridDims.numCells[YY])
1293 "grid cell cx %d cy %d out of range (max %d %d)\n"
1294 "atom %f %f %f, grid->c0 %f %f",
1297 gridDims.numCells[XX],
1298 gridDims.numCells[YY],
1302 gridDims.lowerCorner[XX],
1303 gridDims.lowerCorner[YY]);
1306 /* Take care of potential rounding issues */
1307 cx = std::min(cx, gridDims.numCells[XX] - 1);
1308 cy = std::min(cy, gridDims.numCells[YY] - 1);
1310 /* For the moment cell will contain only the, grid local,
1311 * x and y indices, not z.
1313 setCellAndAtomCount(cell, cx * gridDims.numCells[YY] + cy, cxy_na, i);
1317 /* Put this moved particle after the end of the grid,
1318 * so we can process it later without using conditionals.
1320 setCellAndAtomCount(cell, numColumns, cxy_na, i);
1327 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1329 int cx = static_cast<int>((x[i][XX] - gridDims.lowerCorner[XX]) * gridDims.invCellSize[XX]);
1330 int cy = static_cast<int>((x[i][YY] - gridDims.lowerCorner[YY]) * gridDims.invCellSize[YY]);
1332 /* For non-home zones there could be particles outside
1333 * the non-bonded cut-off range, which have been communicated
1334 * for bonded interactions only. For the result it doesn't
1335 * matter where these end up on the grid. For performance
1336 * we put them in an extra row at the border.
1338 cx = std::max(cx, 0);
1339 cx = std::min(cx, gridDims.numCells[XX] - 1);
1340 cy = std::max(cy, 0);
1341 cy = std::min(cy, gridDims.numCells[YY] - 1);
1343 /* For the moment cell will contain only the, grid local,
1344 * x and y indices, not z.
1346 setCellAndAtomCount(cell, cx * gridDims.numCells[YY] + cy, cxy_na, i);
1351 /*! \brief Resizes grid and atom data which depend on the number of cells */
1352 static void resizeForNumberOfCells(const int numNbnxnAtoms,
1353 const int numAtomsMoved,
1354 GridSetData* gridSetData,
1355 nbnxn_atomdata_t* nbat)
1357 /* Note: gridSetData->cellIndices was already resized before */
1359 /* To avoid conditionals we store the moved particles at the end of a,
1360 * make sure we have enough space.
1362 gridSetData->atomIndices.resize(numNbnxnAtoms + numAtomsMoved);
1364 /* Make space in nbat for storing the atom coordinates */
1365 nbat->resizeCoordinateBuffer(numNbnxnAtoms);
1368 void Grid::setCellIndices(int ddZone,
1370 GridSetData* gridSetData,
1371 gmx::ArrayRef<GridWork> gridWork,
1372 const gmx::Range<int> atomRange,
1373 gmx::ArrayRef<const int> atomInfo,
1374 gmx::ArrayRef<const gmx::RVec> x,
1375 const int numAtomsMoved,
1376 nbnxn_atomdata_t* nbat)
1378 cellOffset_ = cellOffset;
1380 srcAtomBegin_ = *atomRange.begin();
1381 srcAtomEnd_ = *atomRange.end();
1383 const int nthread = gmx_omp_nthreads_get(ModuleMultiThread::Pairsearch);
1385 const int numAtomsPerCell = geometry_.numAtomsPerCell;
1387 /* Make the cell index as a function of x and y */
1391 for (int i = 0; i < numColumns() + 1; i++)
1393 /* We set ncz_max at the beginning of the loop iso at the end
1394 * to skip i=grid->ncx*grid->numCells[YY] which are moved particles
1395 * that do not need to be ordered on the grid.
1401 int cxy_na_i = gridWork[0].numAtomsPerColumn[i];
1402 for (int thread = 1; thread < nthread; thread++)
1404 cxy_na_i += gridWork[thread].numAtomsPerColumn[i];
1406 ncz = (cxy_na_i + numAtomsPerCell - 1) / numAtomsPerCell;
1407 if (nbat->XFormat == nbatX8)
1409 /* Make the number of cell a multiple of 2 */
1410 ncz = (ncz + 1) & ~1;
1412 cxy_ind_[i + 1] = cxy_ind_[i] + ncz;
1413 /* Clear cxy_na_, so we can reuse the array below */
1416 numCellsTotal_ = cxy_ind_[numColumns()] - cxy_ind_[0];
1417 numCellsColumnMax_ = ncz_max;
1419 /* Resize grid and atom data which depend on the number of cells */
1420 resizeForNumberOfCells(atomIndexEnd(), numAtomsMoved, gridSetData, nbat);
1425 "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
1427 geometry_.numAtomsICluster,
1429 dimensions_.numCells[XX],
1430 dimensions_.numCells[YY],
1431 numCellsTotal_ / (static_cast<double>(numColumns())),
1436 for (int cy = 0; cy < dimensions_.numCells[YY]; cy++)
1438 for (int cx = 0; cx < dimensions_.numCells[XX]; cx++)
1440 fprintf(debug, " %2d", cxy_ind_[i + 1] - cxy_ind_[i]);
1443 fprintf(debug, "\n");
1448 /* Make sure the work array for sorting is large enough */
1449 const int worstCaseSortBufferSize = ncz_max * numAtomsPerCell * c_sortGridMaxSizeFactor;
1450 if (worstCaseSortBufferSize > gmx::index(gridWork[0].sortBuffer.size()))
1452 for (GridWork& work : gridWork)
1454 /* Elements not in use should be -1 */
1455 work.sortBuffer.resize(worstCaseSortBufferSize, -1);
1459 /* Now we know the dimensions we can fill the grid.
1460 * This is the first, unsorted fill. We sort the columns after this.
1462 gmx::ArrayRef<int> cells = gridSetData->cells;
1463 gmx::ArrayRef<int> atomIndices = gridSetData->atomIndices;
1464 for (int i : atomRange)
1466 /* At this point nbs->cell contains the local grid x,y indices */
1467 const int cxy = cells[i];
1468 atomIndices[firstAtomInColumn(cxy) + cxy_na_[cxy]++] = i;
1473 /* Set the cell indices for the moved particles */
1474 int n0 = numCellsTotal_ * numAtomsPerCell;
1475 int n1 = numCellsTotal_ * numAtomsPerCell + cxy_na_[numColumns()];
1476 for (int i = n0; i < n1; i++)
1478 cells[atomIndices[i]] = i;
1482 /* Sort the super-cell columns along z into the sub-cells. */
1483 #pragma omp parallel for num_threads(nthread) schedule(static)
1484 for (int thread = 0; thread < nthread; thread++)
1488 gmx::Range<int> columnRange(((thread + 0) * numColumns()) / nthread,
1489 ((thread + 1) * numColumns()) / nthread);
1490 if (geometry_.isSimple)
1492 sortColumnsCpuGeometry(
1493 gridSetData, ddZone, atomInfo, x, nbat, columnRange, gridWork[thread].sortBuffer);
1497 sortColumnsGpuGeometry(
1498 gridSetData, ddZone, atomInfo, x, nbat, columnRange, gridWork[thread].sortBuffer);
1501 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1504 if (geometry_.isSimple && nbat->XFormat == nbatX8)
1506 combine_bounding_box_pairs(*this, bb_, bbj_);
1509 if (!geometry_.isSimple)
1511 numClustersTotal_ = 0;
1512 for (int i = 0; i < numCellsTotal_; i++)
1514 numClustersTotal_ += numClusters_[i];
1520 if (geometry_.isSimple)
1522 print_bbsizes_simple(debug, *this);
1527 "ns non-zero sub-cells: %d average atoms %.2f\n",
1529 atomRange.size() / static_cast<double>(numClustersTotal_));
1531 print_bbsizes_supersub(debug, *this);
1536 } // namespace Nbnxm