2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "nbnxn_grid.h"
45 #include "gromacs/domdec/domdec_struct.h"
46 #include "gromacs/math/utilities.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/mdlib/gmx_omp_nthreads.h"
49 #include "gromacs/mdlib/nb_verlet.h"
50 #include "gromacs/mdlib/nbnxn_atomdata.h"
51 #include "gromacs/mdlib/nbnxn_consts.h"
52 #include "gromacs/mdlib/nbnxn_internal.h"
53 #include "gromacs/mdlib/nbnxn_search.h"
54 #include "gromacs/mdlib/nbnxn_util.h"
55 #include "gromacs/mdlib/updategroupscog.h"
56 #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
57 #include "gromacs/simd/simd.h"
58 #include "gromacs/simd/vector_operations.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/smalloc.h"
62 struct gmx_domdec_zones_t;
64 static real grid_atom_density(int numAtoms,
65 const rvec lowerCorner,
66 const rvec upperCorner)
72 /* To avoid zero density we use a minimum of 1 atom */
76 rvec_sub(upperCorner, lowerCorner, size);
78 return numAtoms/(size[XX]*size[YY]*size[ZZ]);
81 static void set_grid_size_xy(const nbnxn_search *nbs,
85 const rvec lowerCorner,
86 const rvec upperCorner,
90 real tlen, tlen_x, tlen_y;
92 rvec_sub(upperCorner, lowerCorner, size);
94 if (numAtoms > grid->na_sc)
96 GMX_ASSERT(atomDensity > 0, "With one or more atoms, the density should be positive");
98 /* target cell length */
101 /* To minimize the zero interactions, we should make
102 * the largest of the i/j cell cubic.
104 int numAtomsInCell = std::max(grid->na_c, grid->na_cj);
106 /* Approximately cubic cells */
107 tlen = std::cbrt(numAtomsInCell/atomDensity);
113 /* Approximately cubic sub cells */
114 tlen = std::cbrt(grid->na_c/atomDensity);
115 tlen_x = tlen*c_gpuNumClusterPerCellX;
116 tlen_y = tlen*c_gpuNumClusterPerCellY;
118 /* We round ncx and ncy down, because we get less cell pairs
119 * in the nbsist when the fixed cell dimensions (x,y) are
120 * larger than the variable one (z) than the other way around.
122 grid->numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
123 grid->numCells[YY] = std::max(1, static_cast<int>(size[YY]/tlen_y));
127 grid->numCells[XX] = 1;
128 grid->numCells[YY] = 1;
131 for (int d = 0; d < DIM - 1; d++)
133 grid->cellSize[d] = size[d]/grid->numCells[d];
134 grid->invCellSize[d] = 1/grid->cellSize[d];
139 /* This is a non-home zone, add an extra row of cells
140 * for particles communicated for bonded interactions.
141 * These can be beyond the cut-off. It doesn't matter where
142 * they end up on the grid, but for performance it's better
143 * if they don't end up in cells that can be within cut-off range.
145 grid->numCells[XX]++;
146 grid->numCells[YY]++;
149 /* We need one additional cell entry for particles moved by DD */
150 int numCellsXY = grid->numCells[XX]*grid->numCells[YY];
151 grid->cxy_na.resize(numCellsXY + 1);
152 grid->cxy_ind.resize(numCellsXY + 2);
154 for (nbnxn_search_work_t &work : nbs->work)
156 work.cxy_na.resize(numCellsXY + 1);
159 /* Worst case scenario of 1 atom in each last cell */
161 if (grid->na_cj <= grid->na_c)
163 maxNumCells = numAtoms/grid->na_sc + numCellsXY;
167 maxNumCells = numAtoms/grid->na_sc + numCellsXY*grid->na_cj/grid->na_c;
170 grid->nsubc.resize(maxNumCells);
171 grid->bbcz.resize(maxNumCells*NNBSBB_D);
173 /* This resize also zeros the contents, this avoid possible
174 * floating exceptions in SIMD with the unused bb elements.
178 grid->bb.resize(maxNumCells);
183 grid->pbb.resize(maxNumCells*c_gpuNumClusterPerCell/STRIDE_PBB*NNBSBB_XXXX);
185 grid->bb.resize(maxNumCells*c_gpuNumClusterPerCell);
191 if (grid->na_cj == grid->na_c)
193 grid->bbj = grid->bb;
197 grid->bbjStorage.resize(maxNumCells*grid->na_c/grid->na_cj);
198 grid->bbj = grid->bbjStorage;
202 grid->flags.resize(maxNumCells);
205 grid->fep.resize(maxNumCells*grid->na_sc/grid->na_c);
208 copy_rvec(lowerCorner, grid->c0);
209 copy_rvec(upperCorner, grid->c1);
210 copy_rvec(size, grid->size);
213 /* We need to sort paricles in grid columns on z-coordinate.
214 * As particle are very often distributed homogeneously, we a sorting
215 * algorithm similar to pigeonhole sort. We multiply the z-coordinate
216 * by a factor, cast to an int and try to store in that hole. If the hole
217 * is full, we move this or another particle. A second pass is needed to make
218 * contiguous elements. SORT_GRID_OVERSIZE is the ratio of holes to particles.
219 * 4 is the optimal value for homogeneous particle distribution and allows
220 * for an O(#particles) sort up till distributions were all particles are
221 * concentrated in 1/4 of the space. No NlogN fallback is implemented,
222 * as it can be expensive to detect imhomogeneous particle distributions.
223 * SGSF is the maximum ratio of holes used, in the worst case all particles
224 * end up in the last hole and we need #particles extra holes at the end.
226 #define SORT_GRID_OVERSIZE 4
227 #define SGSF (SORT_GRID_OVERSIZE + 1)
229 /* Sort particle index a on coordinates x along dim.
230 * Backwards tells if we want decreasing iso increasing coordinates.
231 * h0 is the minimum of the coordinate range.
232 * invh is the 1/length of the sorting range.
233 * n_per_h (>=n) is the expected average number of particles per 1/invh
234 * sort is the sorting work array.
235 * sort should have a size of at least n_per_h*SORT_GRID_OVERSIZE + n,
236 * or easier, allocate at least n*SGSF elements.
238 static void sort_atoms(int dim, gmx_bool Backwards,
239 int gmx_unused dd_zone,
240 bool gmx_unused relevantAtomsAreWithinGridBounds,
242 gmx::ArrayRef<const gmx::RVec> x,
243 real h0, real invh, int n_per_h,
244 gmx::ArrayRef<int> sort)
252 GMX_ASSERT(n <= n_per_h, "We require n <= n_per_h");
254 /* Transform the inverse range height into the inverse hole height */
255 invh *= n_per_h*SORT_GRID_OVERSIZE;
257 /* Set nsort to the maximum possible number of holes used.
258 * In worst case all n elements end up in the last bin.
260 int nsort = n_per_h*SORT_GRID_OVERSIZE + n;
262 /* Determine the index range used, so we can limit it for the second pass */
263 int zi_min = INT_MAX;
266 /* Sort the particles using a simple index sort */
267 for (int i = 0; i < n; i++)
269 /* The cast takes care of float-point rounding effects below zero.
270 * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
271 * times the box height out of the box.
273 int zi = static_cast<int>((x[a[i]][dim] - h0)*invh);
276 /* As we can have rounding effect, we use > iso >= here */
277 if (relevantAtomsAreWithinGridBounds &&
278 (zi < 0 || (dd_zone == 0 && zi > n_per_h*SORT_GRID_OVERSIZE)))
280 gmx_fatal(FARGS, "(int)((x[%d][%c]=%f - %f)*%f) = %d, not in 0 - %d*%d\n",
281 a[i], 'x'+dim, x[a[i]][dim], h0, invh, zi,
282 n_per_h, SORT_GRID_OVERSIZE);
290 /* In a non-local domain, particles communcated for bonded interactions
291 * can be far beyond the grid size, which is set by the non-bonded
292 * cut-off distance. We sort such particles into the last cell.
294 if (zi > n_per_h*SORT_GRID_OVERSIZE)
296 zi = n_per_h*SORT_GRID_OVERSIZE;
299 /* Ideally this particle should go in sort cell zi,
300 * but that might already be in use,
301 * in that case find the first empty cell higher up
306 zi_min = std::min(zi_min, zi);
307 zi_max = std::max(zi_max, zi);
311 /* We have multiple atoms in the same sorting slot.
312 * Sort on real z for minimal bounding box size.
313 * There is an extra check for identical z to ensure
314 * well-defined output order, independent of input order
315 * to ensure binary reproducibility after restarts.
317 while (sort[zi] >= 0 && ( x[a[i]][dim] > x[sort[zi]][dim] ||
318 (x[a[i]][dim] == x[sort[zi]][dim] &&
326 /* Shift all elements by one slot until we find an empty slot */
329 while (sort[zim] >= 0)
337 zi_max = std::max(zi_max, zim);
340 zi_max = std::max(zi_max, zi);
347 for (int zi = 0; zi < nsort; zi++)
358 for (int zi = zi_max; zi >= zi_min; zi--)
369 gmx_incons("Lost particles while sorting");
374 #define R2F_D(x) ((float)((x) >= 0 ? ((1-GMX_FLOAT_EPS)*(x)) : ((1+GMX_FLOAT_EPS)*(x))))
375 #define R2F_U(x) ((float)((x) >= 0 ? ((1+GMX_FLOAT_EPS)*(x)) : ((1-GMX_FLOAT_EPS)*(x))))
381 /* Coordinate order x,y,z, bb order xyz0 */
382 static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb)
385 real xl, xh, yl, yh, zl, zh;
395 for (int j = 1; j < na; j++)
397 xl = std::min(xl, x[i+XX]);
398 xh = std::max(xh, x[i+XX]);
399 yl = std::min(yl, x[i+YY]);
400 yh = std::max(yh, x[i+YY]);
401 zl = std::min(zl, x[i+ZZ]);
402 zh = std::max(zh, x[i+ZZ]);
405 /* Note: possible double to float conversion here */
406 bb->lower[BB_X] = R2F_D(xl);
407 bb->lower[BB_Y] = R2F_D(yl);
408 bb->lower[BB_Z] = R2F_D(zl);
409 bb->upper[BB_X] = R2F_U(xh);
410 bb->upper[BB_Y] = R2F_U(yh);
411 bb->upper[BB_Z] = R2F_U(zh);
414 /* Packed coordinates, bb order xyz0 */
415 static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb)
417 real xl, xh, yl, yh, zl, zh;
425 for (int j = 1; j < na; j++)
427 xl = std::min(xl, x[j+XX*c_packX4]);
428 xh = std::max(xh, x[j+XX*c_packX4]);
429 yl = std::min(yl, x[j+YY*c_packX4]);
430 yh = std::max(yh, x[j+YY*c_packX4]);
431 zl = std::min(zl, x[j+ZZ*c_packX4]);
432 zh = std::max(zh, x[j+ZZ*c_packX4]);
434 /* Note: possible double to float conversion here */
435 bb->lower[BB_X] = R2F_D(xl);
436 bb->lower[BB_Y] = R2F_D(yl);
437 bb->lower[BB_Z] = R2F_D(zl);
438 bb->upper[BB_X] = R2F_U(xh);
439 bb->upper[BB_Y] = R2F_U(yh);
440 bb->upper[BB_Z] = R2F_U(zh);
443 /* Packed coordinates, bb order xyz0 */
444 static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb)
446 real xl, xh, yl, yh, zl, zh;
454 for (int j = 1; j < na; j++)
456 xl = std::min(xl, x[j+XX*c_packX8]);
457 xh = std::max(xh, x[j+XX*c_packX8]);
458 yl = std::min(yl, x[j+YY*c_packX8]);
459 yh = std::max(yh, x[j+YY*c_packX8]);
460 zl = std::min(zl, x[j+ZZ*c_packX8]);
461 zh = std::max(zh, x[j+ZZ*c_packX8]);
463 /* Note: possible double to float conversion here */
464 bb->lower[BB_X] = R2F_D(xl);
465 bb->lower[BB_Y] = R2F_D(yl);
466 bb->lower[BB_Z] = R2F_D(zl);
467 bb->upper[BB_X] = R2F_U(xh);
468 bb->upper[BB_Y] = R2F_U(yh);
469 bb->upper[BB_Z] = R2F_U(zh);
472 /* Packed coordinates, bb order xyz0 */
473 gmx_unused static void calc_bounding_box_x_x4_halves(int na, const real *x,
474 nbnxn_bb_t *bb, nbnxn_bb_t *bbj)
476 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
479 calc_bounding_box_x_x4(std::min(na, 2), x, bbj);
483 calc_bounding_box_x_x4(std::min(na-2, 2), x+(c_packX4>>1), bbj+1);
487 /* Set the "empty" bounding box to the same as the first one,
488 * so we don't need to treat special cases in the rest of the code.
490 #if NBNXN_SEARCH_BB_SIMD4
491 store4(&bbj[1].lower[0], load4(&bbj[0].lower[0]));
492 store4(&bbj[1].upper[0], load4(&bbj[0].upper[0]));
498 #if NBNXN_SEARCH_BB_SIMD4
499 store4(&bb->lower[0], min(load4(&bbj[0].lower[0]), load4(&bbj[1].lower[0])));
500 store4(&bb->upper[0], max(load4(&bbj[0].upper[0]), load4(&bbj[1].upper[0])));
505 for (i = 0; i < NNBSBB_C; i++)
507 bb->lower[i] = std::min(bbj[0].lower[i], bbj[1].lower[i]);
508 bb->upper[i] = std::max(bbj[0].upper[i], bbj[1].upper[i]);
514 #if NBNXN_SEARCH_BB_SIMD4
516 /* Coordinate order xyz, bb order xxxxyyyyzzzz */
517 static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
520 real xl, xh, yl, yh, zl, zh;
530 for (int j = 1; j < na; j++)
532 xl = std::min(xl, x[i+XX]);
533 xh = std::max(xh, x[i+XX]);
534 yl = std::min(yl, x[i+YY]);
535 yh = std::max(yh, x[i+YY]);
536 zl = std::min(zl, x[i+ZZ]);
537 zh = std::max(zh, x[i+ZZ]);
540 /* Note: possible double to float conversion here */
541 bb[0*STRIDE_PBB] = R2F_D(xl);
542 bb[1*STRIDE_PBB] = R2F_D(yl);
543 bb[2*STRIDE_PBB] = R2F_D(zl);
544 bb[3*STRIDE_PBB] = R2F_U(xh);
545 bb[4*STRIDE_PBB] = R2F_U(yh);
546 bb[5*STRIDE_PBB] = R2F_U(zh);
549 #endif /* NBNXN_SEARCH_BB_SIMD4 */
551 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
553 /* Coordinate order xyz?, bb order xyz0 */
554 static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb)
556 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
559 Simd4Float bb_0_S, bb_1_S;
565 for (int i = 1; i < na; i++)
567 x_S = load4(x+i*NNBSBB_C);
568 bb_0_S = min(bb_0_S, x_S);
569 bb_1_S = max(bb_1_S, x_S);
572 store4(&bb->lower[0], bb_0_S);
573 store4(&bb->upper[0], bb_1_S);
576 /* Coordinate order xyz?, bb order xxxxyyyyzzzz */
577 static void calc_bounding_box_xxxx_simd4(int na, const float *x,
578 nbnxn_bb_t *bb_work_aligned,
581 calc_bounding_box_simd4(na, x, bb_work_aligned);
583 bb[0*STRIDE_PBB] = bb_work_aligned->lower[BB_X];
584 bb[1*STRIDE_PBB] = bb_work_aligned->lower[BB_Y];
585 bb[2*STRIDE_PBB] = bb_work_aligned->lower[BB_Z];
586 bb[3*STRIDE_PBB] = bb_work_aligned->upper[BB_X];
587 bb[4*STRIDE_PBB] = bb_work_aligned->upper[BB_Y];
588 bb[5*STRIDE_PBB] = bb_work_aligned->upper[BB_Z];
591 #endif /* NBNXN_SEARCH_SIMD4_FLOAT_X_BB */
594 /* Combines pairs of consecutive bounding boxes */
595 static void combine_bounding_box_pairs(nbnxn_grid_t *grid,
596 gmx::ArrayRef<const nbnxn_bb_t> bb)
598 // TODO: During SIMDv2 transition only some archs use namespace (remove when done)
601 for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY]; i++)
603 /* Starting bb in a column is expected to be 2-aligned */
604 int sc2 = grid->cxy_ind[i]>>1;
605 /* For odd numbers skip the last bb here */
606 int nc2 = (grid->cxy_na[i]+3)>>(2+1);
608 for (c2 = sc2; c2 < sc2+nc2; c2++)
610 #if NBNXN_SEARCH_BB_SIMD4
611 Simd4Float min_S, max_S;
613 min_S = min(load4(&bb[c2*2+0].lower[0]),
614 load4(&bb[c2*2+1].lower[0]));
615 max_S = max(load4(&bb[c2*2+0].upper[0]),
616 load4(&bb[c2*2+1].upper[0]));
617 store4(&grid->bbj[c2].lower[0], min_S);
618 store4(&grid->bbj[c2].upper[0], max_S);
620 for (int j = 0; j < NNBSBB_C; j++)
622 grid->bbj[c2].lower[j] = std::min(bb[c2*2+0].lower[j],
623 bb[c2*2+1].lower[j]);
624 grid->bbj[c2].upper[j] = std::max(bb[c2*2+0].upper[j],
625 bb[c2*2+1].upper[j]);
629 if (((grid->cxy_na[i]+3)>>2) & 1)
631 /* The bb count in this column is odd: duplicate the last bb */
632 for (int j = 0; j < NNBSBB_C; j++)
634 grid->bbj[c2].lower[j] = bb[c2*2].lower[j];
635 grid->bbj[c2].upper[j] = bb[c2*2].upper[j];
642 /* Prints the average bb size, used for debug output */
643 static void print_bbsizes_simple(FILE *fp,
644 const nbnxn_grid_t *grid)
649 for (int c = 0; c < grid->nc; c++)
651 for (int d = 0; d < DIM; d++)
653 ba[d] += grid->bb[c].upper[d] - grid->bb[c].lower[d];
656 dsvmul(1.0/grid->nc, ba, ba);
658 real avgCellSizeZ = (grid->atom_density > 0 ? grid->na_c/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]) : 0.0);
660 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
664 ba[XX], ba[YY], ba[ZZ],
665 ba[XX]*grid->invCellSize[XX],
666 ba[YY]*grid->invCellSize[YY],
667 grid->atom_density > 0 ? ba[ZZ]/avgCellSizeZ : 0.0);
670 /* Prints the average bb size, used for debug output */
671 static void print_bbsizes_supersub(FILE *fp,
672 const nbnxn_grid_t *grid)
679 for (int c = 0; c < grid->nc; c++)
682 for (int s = 0; s < grid->nsubc[c]; s += STRIDE_PBB)
684 int cs_w = (c*c_gpuNumClusterPerCell + s)/STRIDE_PBB;
685 for (int i = 0; i < STRIDE_PBB; i++)
687 for (int d = 0; d < DIM; d++)
690 grid->pbb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] -
691 grid->pbb[cs_w*NNBSBB_XXXX+ d *STRIDE_PBB+i];
696 for (int s = 0; s < grid->nsubc[c]; s++)
698 int cs = c*c_gpuNumClusterPerCell + s;
699 for (int d = 0; d < DIM; d++)
701 ba[d] += grid->bb[cs].upper[d] - grid->bb[cs].lower[d];
705 ns += grid->nsubc[c];
707 dsvmul(1.0/ns, ba, ba);
709 real avgClusterSizeZ = (grid->atom_density > 0 ? grid->na_sc/(grid->atom_density*grid->cellSize[XX]*grid->cellSize[YY]*c_gpuNumClusterPerCellZ) : 0.0);
711 fprintf(fp, "ns bb: grid %4.2f %4.2f %4.2f abs %4.2f %4.2f %4.2f rel %4.2f %4.2f %4.2f\n",
712 grid->cellSize[XX]/c_gpuNumClusterPerCellX,
713 grid->cellSize[YY]/c_gpuNumClusterPerCellY,
715 ba[XX], ba[YY], ba[ZZ],
716 ba[XX]*c_gpuNumClusterPerCellX*grid->invCellSize[XX],
717 ba[YY]*c_gpuNumClusterPerCellY*grid->invCellSize[YY],
718 grid->atom_density > 0 ? ba[ZZ]/avgClusterSizeZ : 0.0);
721 /* Set non-bonded interaction flags for the current cluster.
722 * Sorts atoms on LJ coefficients: !=0 first, ==0 at the end.
724 static void sort_cluster_on_flag(int numAtomsInCluster,
728 gmx::ArrayRef<int> order,
731 constexpr int c_maxNumAtomsInCluster = 8;
732 int sort1[c_maxNumAtomsInCluster];
733 int sort2[c_maxNumAtomsInCluster];
735 GMX_ASSERT(numAtomsInCluster <= c_maxNumAtomsInCluster, "Need to increase c_maxNumAtomsInCluster to support larger clusters");
740 for (int s = atomStart; s < atomEnd; s += numAtomsInCluster)
742 /* Make lists for this (sub-)cell on atoms with and without LJ */
745 gmx_bool haveQ = FALSE;
747 for (int a = s; a < std::min(s + numAtomsInCluster, atomEnd); a++)
749 haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]);
751 if (GET_CGINFO_HAS_VDW(atinfo[order[a]]))
753 sort1[n1++] = order[a];
758 sort2[n2++] = order[a];
762 /* If we don't have atoms with LJ, there's nothing to sort */
765 *flags |= NBNXN_CI_DO_LJ(subc);
767 if (2*n1 <= numAtomsInCluster)
769 /* Only sort when strictly necessary. Ordering particles
770 * Ordering particles can lead to less accurate summation
771 * due to rounding, both for LJ and Coulomb interactions.
773 if (2*(a_lj_max - s) >= numAtomsInCluster)
775 for (int i = 0; i < n1; i++)
777 order[atomStart + i] = sort1[i];
779 for (int j = 0; j < n2; j++)
781 order[atomStart + n1 + j] = sort2[j];
785 *flags |= NBNXN_CI_HALF_LJ(subc);
790 *flags |= NBNXN_CI_DO_COUL(subc);
796 /* Fill a pair search cell with atoms.
797 * Potentially sorts atoms and sets the interaction flags.
799 static void fill_cell(nbnxn_search *nbs,
801 nbnxn_atomdata_t *nbat,
805 gmx::ArrayRef<const gmx::RVec> x,
806 nbnxn_bb_t gmx_unused *bb_work_aligned)
808 const int numAtoms = atomEnd - atomStart;
812 /* Note that non-local grids are already sorted.
813 * Then sort_cluster_on_flag will only set the flags and the sorting
814 * will not affect the atom order.
816 sort_cluster_on_flag(grid->na_c, atomStart, atomEnd, atinfo, nbs->a,
817 grid->flags.data() + (atomStart >> grid->na_c_2log) - grid->cell0);
822 /* Set the fep flag for perturbed atoms in this (sub-)cell */
824 /* The grid-local cluster/(sub-)cell index */
825 int cell = (atomStart >> grid->na_c_2log) - grid->cell0*(grid->bSimple ? 1 : c_gpuNumClusterPerCell);
827 for (int at = atomStart; at < atomEnd; at++)
829 if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]]))
831 grid->fep[cell] |= (1 << (at - atomStart));
836 /* Now we have sorted the atoms, set the cell indices */
837 for (int at = atomStart; at < atomEnd; at++)
839 nbs->cell[nbs->a[at]] = at;
842 copy_rvec_to_nbat_real(nbs->a.data() + atomStart, numAtoms, grid->na_c,
843 as_rvec_array(x.data()),
844 nbat->XFormat, nbat->x().data(), atomStart);
846 if (nbat->XFormat == nbatX4)
848 /* Store the bounding boxes as xyz.xyz. */
849 size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
850 nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
852 #if GMX_SIMD && GMX_SIMD_REAL_WIDTH == 2
853 if (2*grid->na_cj == grid->na_c)
855 calc_bounding_box_x_x4_halves(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr,
856 grid->bbj.data() + offset*2);
861 calc_bounding_box_x_x4(numAtoms, nbat->x().data() + atom_to_x_index<c_packX4>(atomStart), bb_ptr);
864 else if (nbat->XFormat == nbatX8)
866 /* Store the bounding boxes as xyz.xyz. */
867 size_t offset = (atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log;
868 nbnxn_bb_t *bb_ptr = grid->bb.data() + offset;
870 calc_bounding_box_x_x8(numAtoms, nbat->x().data() + atom_to_x_index<c_packX8>(atomStart), bb_ptr);
873 else if (!grid->bSimple)
875 /* Store the bounding boxes in a format convenient
876 * for SIMD4 calculations: xxxxyyyyzzzz...
880 ((atomStart - grid->cell0*grid->na_sc) >> (grid->na_c_2log + STRIDE_PBB_2LOG))*NNBSBB_XXXX +
881 (((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log) & (STRIDE_PBB - 1));
883 #if NBNXN_SEARCH_SIMD4_FLOAT_X_BB
884 if (nbat->XFormat == nbatXYZQ)
886 calc_bounding_box_xxxx_simd4(numAtoms, nbat->x().data() + atomStart*nbat->xstride,
887 bb_work_aligned, pbb_ptr);
892 calc_bounding_box_xxxx(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
897 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
898 atomStart >> grid->na_c_2log,
899 pbb_ptr[0*STRIDE_PBB], pbb_ptr[3*STRIDE_PBB],
900 pbb_ptr[1*STRIDE_PBB], pbb_ptr[4*STRIDE_PBB],
901 pbb_ptr[2*STRIDE_PBB], pbb_ptr[5*STRIDE_PBB]);
907 /* Store the bounding boxes as xyz.xyz. */
908 nbnxn_bb_t *bb_ptr = grid->bb.data() + ((atomStart - grid->cell0*grid->na_sc) >> grid->na_c_2log);
910 calc_bounding_box(numAtoms, nbat->xstride, nbat->x().data() + atomStart*nbat->xstride,
915 int bbo = (atomStart - grid->cell0*grid->na_sc)/grid->na_c;
916 fprintf(debug, "cell %4d bb %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
917 atomStart >> grid->na_c_2log,
918 grid->bb[bbo].lower[BB_X],
919 grid->bb[bbo].lower[BB_Y],
920 grid->bb[bbo].lower[BB_Z],
921 grid->bb[bbo].upper[BB_X],
922 grid->bb[bbo].upper[BB_Y],
923 grid->bb[bbo].upper[BB_Z]);
928 /* Spatially sort the atoms within one grid column */
929 static void sort_columns_simple(nbnxn_search *nbs,
932 int atomStart, int atomEnd,
934 gmx::ArrayRef<const gmx::RVec> x,
935 nbnxn_atomdata_t *nbat,
936 int cxy_start, int cxy_end,
937 gmx::ArrayRef<int> sort_work)
941 fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
942 grid->cell0, cxy_start, cxy_end, atomStart, atomEnd);
945 const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0);
947 /* Sort the atoms within each x,y column in 3 dimensions */
948 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
950 int na = grid->cxy_na[cxy];
951 int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
952 int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
954 /* Sort the atoms within each x,y column on z coordinate */
955 sort_atoms(ZZ, FALSE, dd_zone,
956 relevantAtomsAreWithinGridBounds,
957 nbs->a.data() + ash, na, x,
959 1.0/grid->size[ZZ], ncz*grid->na_sc,
962 /* Fill the ncz cells in this column */
963 int cfilled = grid->cxy_ind[cxy];
964 for (int cz = 0; cz < ncz; cz++)
966 int c = grid->cxy_ind[cxy] + cz;
968 int ash_c = ash + cz*grid->na_sc;
969 int na_c = std::min(grid->na_sc, na-(ash_c-ash));
971 fill_cell(nbs, grid, nbat,
972 ash_c, ash_c+na_c, atinfo, x,
975 /* This copy to bbcz is not really necessary.
976 * But it allows to use the same grid search code
977 * for the simple and supersub cell setups.
983 grid->bbcz[c*NNBSBB_D ] = grid->bb[cfilled].lower[BB_Z];
984 grid->bbcz[c*NNBSBB_D+1] = grid->bb[cfilled].upper[BB_Z];
987 /* Set the unused atom indices to -1 */
988 for (int ind = na; ind < ncz*grid->na_sc; ind++)
990 nbs->a[ash+ind] = -1;
995 /* Spatially sort the atoms within one grid column */
996 static void sort_columns_supersub(nbnxn_search *nbs,
999 int atomStart, int atomEnd,
1001 gmx::ArrayRef<const gmx::RVec> x,
1002 nbnxn_atomdata_t *nbat,
1003 int cxy_start, int cxy_end,
1004 gmx::ArrayRef<int> sort_work)
1006 nbnxn_bb_t bb_work_array[2];
1007 nbnxn_bb_t *bb_work_aligned = reinterpret_cast<nbnxn_bb_t *>((reinterpret_cast<std::size_t>(bb_work_array + 1)) & (~(static_cast<std::size_t>(15))));
1011 fprintf(debug, "cell0 %d sorting columns %d - %d, atoms %d - %d\n",
1012 grid->cell0, cxy_start, cxy_end, atomStart, atomEnd);
1015 const bool relevantAtomsAreWithinGridBounds = (grid->maxAtomGroupRadius == 0);
1017 int subdiv_x = grid->na_c;
1018 int subdiv_y = c_gpuNumClusterPerCellX*subdiv_x;
1019 int subdiv_z = c_gpuNumClusterPerCellY*subdiv_y;
1021 /* Sort the atoms within each x,y column in 3 dimensions.
1022 * Loop over all columns on the x/y grid.
1024 for (int cxy = cxy_start; cxy < cxy_end; cxy++)
1026 int gridX = cxy/grid->numCells[YY];
1027 int gridY = cxy - gridX*grid->numCells[YY];
1029 int numAtomsInColumn = grid->cxy_na[cxy];
1030 int numCellsInColumn = grid->cxy_ind[cxy + 1] - grid->cxy_ind[cxy];
1031 int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
1033 /* Sort the atoms within each x,y column on z coordinate */
1034 sort_atoms(ZZ, FALSE, dd_zone,
1035 relevantAtomsAreWithinGridBounds,
1036 nbs->a.data() + ash, numAtomsInColumn, x,
1038 1.0/grid->size[ZZ], numCellsInColumn*grid->na_sc,
1041 /* This loop goes over the cells and clusters along z at once */
1042 for (int sub_z = 0; sub_z < numCellsInColumn*c_gpuNumClusterPerCellZ; sub_z++)
1044 int ash_z = ash + sub_z*subdiv_z;
1045 int na_z = std::min(subdiv_z, numAtomsInColumn - (ash_z - ash));
1047 /* We have already sorted on z */
1049 if (sub_z % c_gpuNumClusterPerCellZ == 0)
1051 cz = sub_z/c_gpuNumClusterPerCellZ;
1052 int cell = grid->cxy_ind[cxy] + cz;
1054 /* The number of atoms in this cell/super-cluster */
1055 int numAtoms = std::min(grid->na_sc, numAtomsInColumn - (ash_z - ash));
1057 grid->nsubc[cell] = std::min(c_gpuNumClusterPerCell,
1058 (numAtoms + grid->na_c - 1)/grid->na_c);
1060 /* Store the z-boundaries of the bounding box of the cell */
1061 grid->bbcz[cell*NNBSBB_D ] = x[nbs->a[ash_z]][ZZ];
1062 grid->bbcz[cell*NNBSBB_D+1] = x[nbs->a[ash_z + numAtoms - 1]][ZZ];
1065 if (c_gpuNumClusterPerCellY > 1)
1067 /* Sort the atoms along y */
1068 sort_atoms(YY, (sub_z & 1) != 0, dd_zone,
1069 relevantAtomsAreWithinGridBounds,
1070 nbs->a.data() + ash_z, na_z, x,
1071 grid->c0[YY] + gridY*grid->cellSize[YY],
1072 grid->invCellSize[YY], subdiv_z,
1076 for (int sub_y = 0; sub_y < c_gpuNumClusterPerCellY; sub_y++)
1078 int ash_y = ash_z + sub_y*subdiv_y;
1079 int na_y = std::min(subdiv_y, numAtomsInColumn - (ash_y - ash));
1081 if (c_gpuNumClusterPerCellX > 1)
1083 /* Sort the atoms along x */
1084 sort_atoms(XX, ((cz*c_gpuNumClusterPerCellY + sub_y) & 1) != 0, dd_zone,
1085 relevantAtomsAreWithinGridBounds,
1086 nbs->a.data() + ash_y, na_y, x,
1087 grid->c0[XX] + gridX*grid->cellSize[XX],
1088 grid->invCellSize[XX], subdiv_y,
1092 for (int sub_x = 0; sub_x < c_gpuNumClusterPerCellX; sub_x++)
1094 int ash_x = ash_y + sub_x*subdiv_x;
1095 int na_x = std::min(subdiv_x, numAtomsInColumn - (ash_x - ash));
1097 fill_cell(nbs, grid, nbat,
1098 ash_x, ash_x + na_x, atinfo, x,
1104 /* Set the unused atom indices to -1 */
1105 for (int ind = numAtomsInColumn; ind < numCellsInColumn*grid->na_sc; ind++)
1107 nbs->a[ash + ind] = -1;
1112 /* Sets the cell index in the cell array for atom \p atomIndex and increments the atom count for the grid column */
1113 static void setCellAndAtomCount(gmx::ArrayRef<int> cell,
1115 gmx::ArrayRef<int> cxy_na,
1118 cell[atomIndex] = cellIndex;
1119 cxy_na[cellIndex] += 1;
1122 /* Determine in which grid column atoms should go */
1123 static void calc_column_indices(nbnxn_grid_t *grid,
1124 const gmx::UpdateGroupsCog *updateGroupsCog,
1125 int atomStart, int atomEnd,
1126 gmx::ArrayRef<const gmx::RVec> x,
1127 int dd_zone, const int *move,
1128 int thread, int nthread,
1129 gmx::ArrayRef<int> cell,
1130 gmx::ArrayRef<int> cxy_na)
1132 /* We add one extra cell for particles which moved during DD */
1133 for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++)
1138 int taskAtomStart = atomStart + static_cast<int>((thread + 0)*(atomEnd - atomStart))/nthread;
1139 int taskAtomEnd = atomStart + static_cast<int>((thread + 1)*(atomEnd - atomStart))/nthread;
1143 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1145 if (move == nullptr || move[i] >= 0)
1148 const gmx::RVec &coord = (updateGroupsCog ? updateGroupsCog->cogForAtom(i) : x[i]);
1150 /* We need to be careful with rounding,
1151 * particles might be a few bits outside the local zone.
1152 * The int cast takes care of the lower bound,
1153 * we will explicitly take care of the upper bound.
1155 int cx = static_cast<int>((coord[XX] - grid->c0[XX])*grid->invCellSize[XX]);
1156 int cy = static_cast<int>((coord[YY] - grid->c0[YY])*grid->invCellSize[YY]);
1159 if (cx < 0 || cx > grid->numCells[XX] ||
1160 cy < 0 || cy > grid->numCells[YY])
1163 "grid cell cx %d cy %d out of range (max %d %d)\n"
1164 "atom %f %f %f, grid->c0 %f %f",
1165 cx, cy, grid->numCells[XX], grid->numCells[YY],
1166 x[i][XX], x[i][YY], x[i][ZZ], grid->c0[XX], grid->c0[YY]);
1169 /* Take care of potential rouding issues */
1170 cx = std::min(cx, grid->numCells[XX] - 1);
1171 cy = std::min(cy, grid->numCells[YY] - 1);
1173 /* For the moment cell will contain only the, grid local,
1174 * x and y indices, not z.
1176 setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i);
1180 /* Put this moved particle after the end of the grid,
1181 * so we can process it later without using conditionals.
1183 setCellAndAtomCount(cell, grid->numCells[XX]*grid->numCells[YY], cxy_na, i);
1190 for (int i = taskAtomStart; i < taskAtomEnd; i++)
1192 int cx = static_cast<int>((x[i][XX] - grid->c0[XX])*grid->invCellSize[XX]);
1193 int cy = static_cast<int>((x[i][YY] - grid->c0[YY])*grid->invCellSize[YY]);
1195 /* For non-home zones there could be particles outside
1196 * the non-bonded cut-off range, which have been communicated
1197 * for bonded interactions only. For the result it doesn't
1198 * matter where these end up on the grid. For performance
1199 * we put them in an extra row at the border.
1201 cx = std::max(cx, 0);
1202 cx = std::min(cx, grid->numCells[XX] - 1);
1203 cy = std::max(cy, 0);
1204 cy = std::min(cy, grid->numCells[YY] - 1);
1206 /* For the moment cell will contain only the, grid local,
1207 * x and y indices, not z.
1209 setCellAndAtomCount(cell, cx*grid->numCells[YY] + cy, cxy_na, i);
1214 /* Resizes grid and atom data which depend on the number of cells */
1215 static void resizeForNumberOfCells(const nbnxn_grid_t &grid,
1218 nbnxn_atomdata_t *nbat)
1220 int numNbnxnAtoms = (grid.cell0 + grid.nc)*grid.na_sc;
1222 /* Note: nbs->cell was already resized before */
1224 /* To avoid conditionals we store the moved particles at the end of a,
1225 * make sure we have enough space.
1227 nbs->a.resize(numNbnxnAtoms + numAtomsMoved);
1229 /* Make space in nbat for storing the atom coordinates */
1230 nbat->resizeCoordinateBuffer(numNbnxnAtoms);
1233 /* Determine in which grid cells the atoms should go */
1235 calc_cell_indices(nbnxn_search *nbs,
1238 const gmx::UpdateGroupsCog *updateGroupsCog,
1242 gmx::ArrayRef<const gmx::RVec> x,
1245 nbnxn_atomdata_t *nbat)
1247 /* First compute all grid/column indices and store them in nbs->cell */
1248 nbs->cell.resize(atomEnd);
1250 const int nthread = gmx_omp_nthreads_get(emntPairsearch);
1252 #pragma omp parallel for num_threads(nthread) schedule(static)
1253 for (int thread = 0; thread < nthread; thread++)
1257 calc_column_indices(grid, updateGroupsCog,
1258 atomStart, atomEnd, x,
1259 ddZone, move, thread, nthread,
1260 nbs->cell, nbs->work[thread].cxy_na);
1262 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1265 /* Make the cell index as a function of x and y */
1268 grid->cxy_ind[0] = 0;
1269 for (int i = 0; i < grid->numCells[XX]*grid->numCells[YY] + 1; i++)
1271 /* We set ncz_max at the beginning of the loop iso at the end
1272 * to skip i=grid->ncx*grid->numCells[YY] which are moved particles
1273 * that do not need to be ordered on the grid.
1279 int cxy_na_i = nbs->work[0].cxy_na[i];
1280 for (int thread = 1; thread < nthread; thread++)
1282 cxy_na_i += nbs->work[thread].cxy_na[i];
1284 ncz = (cxy_na_i + grid->na_sc - 1)/grid->na_sc;
1285 if (nbat->XFormat == nbatX8)
1287 /* Make the number of cell a multiple of 2 */
1288 ncz = (ncz + 1) & ~1;
1290 grid->cxy_ind[i+1] = grid->cxy_ind[i] + ncz;
1291 /* Clear cxy_na, so we can reuse the array below */
1292 grid->cxy_na[i] = 0;
1294 grid->nc = grid->cxy_ind[grid->numCells[XX]*grid->numCells[YY]] - grid->cxy_ind[0];
1296 resizeForNumberOfCells(*grid, numAtomsMoved, nbs, nbat);
1300 fprintf(debug, "ns na_sc %d na_c %d super-cells: %d x %d y %d z %.1f maxz %d\n",
1301 grid->na_sc, grid->na_c, grid->nc,
1302 grid->numCells[XX], grid->numCells[YY], grid->nc/(static_cast<double>(grid->numCells[XX]*grid->numCells[YY])),
1307 for (int cy = 0; cy < grid->numCells[YY]; cy++)
1309 for (int cx = 0; cx < grid->numCells[XX]; cx++)
1311 fprintf(debug, " %2d", grid->cxy_ind[i+1]-grid->cxy_ind[i]);
1314 fprintf(debug, "\n");
1319 /* Make sure the work array for sorting is large enough */
1320 if (ncz_max*grid->na_sc*SGSF > gmx::index(nbs->work[0].sortBuffer.size()))
1322 for (nbnxn_search_work_t &work : nbs->work)
1324 /* Elements not in use should be -1 */
1325 work.sortBuffer.resize(ncz_max*grid->na_sc*SGSF, -1);
1329 /* Now we know the dimensions we can fill the grid.
1330 * This is the first, unsorted fill. We sort the columns after this.
1332 for (int i = atomStart; i < atomEnd; i++)
1334 /* At this point nbs->cell contains the local grid x,y indices */
1335 int cxy = nbs->cell[i];
1336 nbs->a[(grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc + grid->cxy_na[cxy]++] = i;
1341 /* Set the cell indices for the moved particles */
1342 int n0 = grid->nc*grid->na_sc;
1343 int n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->numCells[XX]*grid->numCells[YY]];
1346 for (int i = n0; i < n1; i++)
1348 nbs->cell[nbs->a[i]] = i;
1353 /* Sort the super-cell columns along z into the sub-cells. */
1354 #pragma omp parallel for num_threads(nthread) schedule(static)
1355 for (int thread = 0; thread < nthread; thread++)
1359 int columnStart = ((thread + 0)*grid->numCells[XX]*grid->numCells[YY])/nthread;
1360 int columnEnd = ((thread + 1)*grid->numCells[XX]*grid->numCells[YY])/nthread;
1363 sort_columns_simple(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat,
1364 columnStart, columnEnd,
1365 nbs->work[thread].sortBuffer);
1369 sort_columns_supersub(nbs, ddZone, grid, atomStart, atomEnd, atinfo, x, nbat,
1370 columnStart, columnEnd,
1371 nbs->work[thread].sortBuffer);
1374 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1377 if (grid->bSimple && nbat->XFormat == nbatX8)
1379 combine_bounding_box_pairs(grid, grid->bb);
1384 grid->nsubc_tot = 0;
1385 for (int i = 0; i < grid->nc; i++)
1387 grid->nsubc_tot += grid->nsubc[i];
1395 print_bbsizes_simple(debug, grid);
1399 fprintf(debug, "ns non-zero sub-cells: %d average atoms %.2f\n",
1400 grid->nsubc_tot, (atomEnd - atomStart)/static_cast<double>(grid->nsubc_tot));
1402 print_bbsizes_supersub(debug, grid);
1407 /* Sets up a grid and puts the atoms on the grid.
1408 * This function only operates on one domain of the domain decompostion.
1409 * Note that without domain decomposition there is only one domain.
1411 void nbnxn_put_on_grid(nbnxn_search_t nbs,
1415 const rvec lowerCorner,
1416 const rvec upperCorner,
1417 const gmx::UpdateGroupsCog *updateGroupsCog,
1422 gmx::ArrayRef<const gmx::RVec> x,
1426 nbnxn_atomdata_t *nbat)
1428 nbnxn_grid_t *grid = &nbs->grid[ddZone];
1430 nbs_cycle_start(&nbs->cc[enbsCCgrid]);
1432 grid->bSimple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
1434 grid->na_c = nbnxn_kernel_to_cluster_i_size(nb_kernel_type);
1435 grid->na_cj = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
1436 grid->na_sc = (grid->bSimple ? 1 : c_gpuNumClusterPerCell)*grid->na_c;
1437 grid->na_c_2log = get_2log(grid->na_c);
1446 (nbs->grid[ddZone - 1].cell0 + nbs->grid[ddZone - 1].nc)*
1447 nbs->grid[ddZone- 1].na_sc/grid->na_sc;
1450 const int n = atomEnd - atomStart;
1455 copy_mat(box, nbs->box);
1457 /* Avoid zero density */
1458 if (atomDensity > 0)
1460 grid->atom_density = atomDensity;
1464 grid->atom_density = grid_atom_density(n - numAtomsMoved, lowerCorner, upperCorner);
1469 nbs->natoms_local = atomEnd - numAtomsMoved;
1470 /* We assume that nbnxn_put_on_grid is called first
1471 * for the local atoms (ddZone=0).
1473 nbs->natoms_nonlocal = atomEnd - numAtomsMoved;
1475 /* When not using atom groups, all atoms should be within the grid bounds */
1476 grid->maxAtomGroupRadius = (updateGroupsCog ? updateGroupsCog->maxUpdateGroupRadius() : 0);
1477 /* For the non-local grids the situation is the same as for the local */
1478 for (size_t g = 1; g < nbs->grid.size(); g++)
1480 nbs->grid[g].maxAtomGroupRadius = grid->maxAtomGroupRadius;
1485 fprintf(debug, "natoms_local = %5d atom_density = %5.1f\n",
1486 nbs->natoms_local, grid->atom_density);
1491 nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, atomEnd);
1494 /* We always use the home zone (grid[0]) for setting the cell size,
1495 * since determining densities for non-local zones is difficult.
1497 set_grid_size_xy(nbs, grid,
1498 ddZone, n - numAtomsMoved,
1499 lowerCorner, upperCorner,
1500 nbs->grid[0].atom_density);
1502 calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
1506 nbat->natoms_local = nbat->numAtoms();
1508 if (ddZone == gmx::ssize(nbs->grid) - 1)
1510 /* We are done setting up all grids, we can resize the force buffers */
1511 nbat->resizeForceBuffers();
1514 nbs_cycle_stop(&nbs->cc[enbsCCgrid]);
1517 /* Calls nbnxn_put_on_grid for all non-local domains */
1518 void nbnxn_put_on_grid_nonlocal(nbnxn_search_t nbs,
1519 const struct gmx_domdec_zones_t *zones,
1521 gmx::ArrayRef<const gmx::RVec> x,
1523 nbnxn_atomdata_t *nbat)
1525 for (int zone = 1; zone < zones->n; zone++)
1528 for (int d = 0; d < DIM; d++)
1530 c0[d] = zones->size[zone].bb_x0[d];
1531 c1[d] = zones->size[zone].bb_x1[d];
1534 nbnxn_put_on_grid(nbs, nbs->ePBC, nullptr,
1537 zones->cg_range[zone],
1538 zones->cg_range[zone+1],
1548 void nbnxn_get_ncells(nbnxn_search_t nbs, int *ncx, int *ncy)
1550 *ncx = nbs->grid[0].numCells[XX];
1551 *ncy = nbs->grid[0].numCells[YY];
1554 gmx::ArrayRef<const int> nbnxn_get_atomorder(const nbnxn_search *nbs)
1556 /* Return the atom order for the home cell (index 0) */
1557 const nbnxn_grid_t &grid = nbs->grid[0];
1559 int numIndices = grid.cxy_ind[grid.numCells[XX]*grid.numCells[YY]]*grid.na_sc;
1561 return gmx::constArrayRefFromArray(nbs->a.data(), numIndices);
1564 void nbnxn_set_atomorder(nbnxn_search_t nbs)
1566 /* Set the atom order for the home cell (index 0) */
1567 nbnxn_grid_t *grid = &nbs->grid[0];
1570 for (int cx = 0; cx < grid->numCells[XX]; cx++)
1572 for (int cy = 0; cy < grid->numCells[YY]; cy++)
1574 int cxy = cx*grid->numCells[YY] + cy;
1575 int j = grid->cxy_ind[cxy]*grid->na_sc;
1576 for (int cz = 0; cz < grid->cxy_na[cxy]; cz++)
1587 gmx::ArrayRef<const int> nbnxn_get_gridindices(const nbnxn_search *nbs)