SYCL: Avoid using no_init read accessor in rocFFT
[alexxy/gromacs.git] / src / gromacs / domdec / cellsizes.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /* \internal \file
36  *
37  * \brief Implements DD cell-size related functions.
38  *
39  * \author Berk Hess <hess@kth.se>
40  * \author Roland Schulz <roland.schulz@intel.com>
41  * \ingroup module_domdec
42  */
43
44 #include "gmxpre.h"
45
46 #include "cellsizes.h"
47
48 #include "config.h"
49
50 #include "gromacs/gmxlib/network.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/mdtypes/commrec.h"
53 #include "gromacs/utility/cstringutil.h"
54 #include "gromacs/utility/fatalerror.h"
55
56 #include "atomdistribution.h"
57 #include "domdec_internal.h"
58 #include "utility.h"
59
60 static void set_pme_maxshift(gmx_domdec_t*      dd,
61                              gmx_ddpme_t*       ddpme,
62                              gmx_bool           bUniform,
63                              const gmx_ddbox_t* ddbox,
64                              const real*        cellFrac)
65 {
66     int sh = 0;
67
68     gmx_domdec_comm_t* comm = dd->comm;
69     const int          nc   = dd->numCells[ddpme->dim];
70     const int          ns   = ddpme->nslab;
71
72     if (!ddpme->dim_match)
73     {
74         /* PP decomposition is not along dim: the worst situation */
75         sh = ns / 2;
76     }
77     else if (ns <= 3 || (bUniform && ns == nc))
78     {
79         /* The optimal situation */
80         sh = 1;
81     }
82     else
83     {
84         /* We need to check for all pme nodes which nodes they
85          * could possibly need to communicate with.
86          */
87         const int* xmin = ddpme->pp_min;
88         const int* xmax = ddpme->pp_max;
89         /* Allow for atoms to be maximally 2/3 times the cut-off
90          * out of their DD cell. This is a reasonable balance between
91          * between performance and support for most charge-group/cut-off
92          * combinations.
93          */
94         real range = 2.0 / 3.0 * comm->systemInfo.cutoff / ddbox->box_size[ddpme->dim];
95         /* Avoid extra communication when we are exactly at a boundary */
96         range *= 0.999;
97
98         sh = 1;
99         for (int s = 0; s < ns; s++)
100         {
101             /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
102             real pme_boundary = static_cast<real>(s) / ns;
103             while (sh + 1 < ns
104                    && ((s - (sh + 1) >= 0 && cellFrac[xmax[s - (sh + 1)] + 1] + range > pme_boundary)
105                        || (s - (sh + 1) < 0 && cellFrac[xmax[s - (sh + 1) + ns] + 1] - 1 + range > pme_boundary)))
106             {
107                 sh++;
108             }
109             pme_boundary = static_cast<real>(s + 1) / ns;
110             while (sh + 1 < ns
111                    && ((s + (sh + 1) < ns && cellFrac[xmin[s + (sh + 1)]] - range < pme_boundary)
112                        || (s + (sh + 1) >= ns && cellFrac[xmin[s + (sh + 1) - ns]] + 1 - range < pme_boundary)))
113             {
114                 sh++;
115             }
116         }
117     }
118
119     ddpme->maxshift = sh;
120
121     if (debug)
122     {
123         fprintf(debug, "PME slab communication range for dim %d is %d\n", ddpme->dim, ddpme->maxshift);
124     }
125 }
126
127 static void check_box_size(const gmx_domdec_t* dd, const gmx_ddbox_t* ddbox)
128 {
129     for (int d = 0; d < dd->ndim; d++)
130     {
131         const int dim = dd->dim[d];
132         if (dim < ddbox->nboundeddim
133             && ddbox->box_size[dim] * ddbox->skew_fac[dim]
134                        < dd->numCells[dim] * dd->comm->cellsize_limit * DD_CELL_MARGIN)
135         {
136             gmx_fatal(
137                     FARGS,
138                     "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller "
139                     "than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
140                     dim2char(dim),
141                     ddbox->box_size[dim],
142                     ddbox->skew_fac[dim],
143                     dd->numCells[dim],
144                     dd->comm->cellsize_limit);
145         }
146     }
147 }
148
149 real grid_jump_limit(const gmx_domdec_comm_t* comm, real cutoff, int dim_ind)
150 {
151     /* The distance between the boundaries of cells at distance
152      * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
153      * and by the fact that cells should not be shifted by more than
154      * half their size, such that cg's only shift by one cell
155      * at redecomposition.
156      */
157     real grid_jump_limit = comm->cellsize_limit;
158     if (!comm->bVacDLBNoLimit)
159     {
160         if (comm->bPMELoadBalDLBLimits)
161         {
162             cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
163         }
164         grid_jump_limit = std::max(grid_jump_limit, cutoff / comm->cd[dim_ind].numPulses());
165     }
166
167     return grid_jump_limit;
168 }
169
170 /* This function should be used for moving the domain boudaries during DLB,
171  * for obtaining the minimum cell size. It checks the initially set limit
172  * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
173  * and, possibly, a longer cut-off limit set for PME load balancing.
174  */
175 static real cellsize_min_dlb(gmx_domdec_comm_t* comm, int dim_ind, int dim)
176 {
177     real cellsize_min = comm->cellsize_min[dim];
178
179     if (!comm->bVacDLBNoLimit)
180     {
181         /* The cut-off might have changed, e.g. by PME load balacning,
182          * from the value used to set comm->cellsize_min, so check it.
183          */
184         cellsize_min = std::max(cellsize_min, comm->systemInfo.cutoff / comm->cd[dim_ind].np_dlb);
185
186         if (comm->bPMELoadBalDLBLimits)
187         {
188             /* Check for the cut-off limit set by the PME load balancing */
189             cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff / comm->cd[dim_ind].np_dlb);
190         }
191     }
192
193     return cellsize_min;
194 }
195
196 /* Set the domain boundaries. Use for static (or no) load balancing,
197  * and also for the starting state for dynamic load balancing.
198  * setmode determine if and where the boundaries are stored, use enum above.
199  * Returns the number communication pulses in npulse.
200  */
201 gmx::ArrayRef<const std::vector<real>>
202 set_dd_cell_sizes_slb(gmx_domdec_t* dd, const gmx_ddbox_t* ddbox, int setmode, ivec npulse)
203 {
204     gmx_domdec_comm_t* comm = dd->comm;
205
206     gmx::ArrayRef<std::vector<real>> cell_x_master;
207     if (setmode == setcellsizeslbMASTER)
208     {
209         cell_x_master = dd->ma->cellSizesBuffer;
210     }
211
212     rvec cellsize_min;
213     for (int d = 0; d < DIM; d++)
214     {
215         cellsize_min[d] = ddbox->box_size[d] * ddbox->skew_fac[d];
216         npulse[d]       = 1;
217         if (dd->numCells[d] == 1 || comm->slb_frac[d] == nullptr)
218         {
219             /* Uniform grid */
220             real cell_dx = ddbox->box_size[d] / dd->numCells[d];
221             switch (setmode)
222             {
223                 case setcellsizeslbMASTER:
224                     for (int j = 0; j < dd->numCells[d] + 1; j++)
225                     {
226                         cell_x_master[d][j] = ddbox->box0[d] + j * cell_dx;
227                     }
228                     break;
229                 case setcellsizeslbLOCAL:
230                     comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]) * cell_dx;
231                     comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d] + 1) * cell_dx;
232                     break;
233                 default: break;
234             }
235             real cellsize = cell_dx * ddbox->skew_fac[d];
236             while (cellsize * npulse[d] < comm->systemInfo.cutoff)
237             {
238                 npulse[d]++;
239             }
240             cellsize_min[d] = cellsize;
241         }
242         else
243         {
244             /* Statically load balanced grid */
245             /* Also when we are not doing a master distribution we determine
246              * all cell borders in a loop to obtain identical values
247              * to the master distribution case and to determine npulse.
248              */
249             gmx::ArrayRef<real> cell_x;
250             std::vector<real>   cell_x_buffer;
251             if (setmode == setcellsizeslbMASTER)
252             {
253                 cell_x = cell_x_master[d];
254             }
255             else
256             {
257                 cell_x_buffer.resize(dd->numCells[d] + 1);
258                 cell_x = cell_x_buffer;
259             }
260             cell_x[0] = ddbox->box0[d];
261             for (int j = 0; j < dd->numCells[d]; j++)
262             {
263                 real cell_dx  = ddbox->box_size[d] * comm->slb_frac[d][j];
264                 cell_x[j + 1] = cell_x[j] + cell_dx;
265                 real cellsize = cell_dx * ddbox->skew_fac[d];
266                 while (cellsize * npulse[d] < comm->systemInfo.cutoff && npulse[d] < dd->numCells[d] - 1)
267                 {
268                     npulse[d]++;
269                 }
270                 cellsize_min[d] = std::min(cellsize_min[d], cellsize);
271             }
272             if (setmode == setcellsizeslbLOCAL)
273             {
274                 comm->cell_x0[d] = cell_x[dd->ci[d]];
275                 comm->cell_x1[d] = cell_x[dd->ci[d] + 1];
276             }
277         }
278         /* The following limitation is to avoid that a cell would receive
279          * some of its own home charge groups back over the periodic boundary.
280          * Double charge groups cause trouble with the global indices.
281          */
282         if (d < ddbox->npbcdim && dd->numCells[d] > 1 && npulse[d] >= dd->numCells[d])
283         {
284             char error_string[STRLEN];
285
286             sprintf(error_string,
287                     "The box size in direction %c (%f) times the triclinic skew factor (%f) is too "
288                     "small for a cut-off of %f with %d domain decomposition cells, use 1 or more "
289                     "than %d %s or increase the box size in this direction",
290                     dim2char(d),
291                     ddbox->box_size[d],
292                     ddbox->skew_fac[d],
293                     comm->systemInfo.cutoff,
294                     dd->numCells[d],
295                     dd->numCells[d],
296                     dd->nnodes > dd->numCells[d] ? "cells" : "ranks");
297
298             if (setmode == setcellsizeslbLOCAL)
299             {
300                 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd), "%s", error_string);
301             }
302             else
303             {
304                 gmx_fatal(FARGS, "%s", error_string);
305             }
306         }
307     }
308
309     if (!isDlbOn(comm))
310     {
311         copy_rvec(cellsize_min, comm->cellsize_min);
312     }
313
314     DDRankSetup& ddRankSetup = comm->ddRankSetup;
315     for (int d = 0; d < ddRankSetup.npmedecompdim; d++)
316     {
317         set_pme_maxshift(dd,
318                          &ddRankSetup.ddpme[d],
319                          comm->slb_frac[dd->dim[d]] == nullptr,
320                          ddbox,
321                          ddRankSetup.ddpme[d].slb_dim_f);
322     }
323
324     return cell_x_master;
325 }
326
327
328 static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t*      dd,
329                                                   int                d,
330                                                   int                dim,
331                                                   RowMaster*         rowMaster,
332                                                   const gmx_ddbox_t* ddbox,
333                                                   gmx_bool           bUniform,
334                                                   int64_t            step,
335                                                   real               cellsize_limit_f,
336                                                   int                range[])
337 {
338     gmx_bool bLastHi  = FALSE;
339     int      nrange[] = { range[0], range[1] };
340
341     const real region_size = rowMaster->cellFrac[range[1]] - rowMaster->cellFrac[range[0]];
342
343     GMX_ASSERT(region_size >= (range[1] - range[0]) * cellsize_limit_f,
344                "The region should fit all cells at minimum size");
345
346     gmx_domdec_comm_t* comm = dd->comm;
347
348     const int ncd = dd->numCells[dim];
349
350     const bool dimHasPbc = (dim < ddbox->npbcdim);
351
352     gmx::ArrayRef<real> cell_size = rowMaster->buf_ncd;
353
354     if (debug)
355     {
356         fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
357     }
358
359     /* First we need to check if the scaling does not make cells
360      * smaller than the smallest allowed size.
361      * We need to do this iteratively, since if a cell is too small,
362      * it needs to be enlarged, which makes all the other cells smaller,
363      * which could in turn make another cell smaller than allowed.
364      */
365     for (int i = range[0]; i < range[1]; i++)
366     {
367         rowMaster->isCellMin[i] = false;
368     }
369     int nmin     = 0;
370     int nmin_old = 0;
371     do
372     {
373         nmin_old = nmin;
374         /* We need the total for normalization */
375         real fac = 0;
376         for (int i = range[0]; i < range[1]; i++)
377         {
378             if (!rowMaster->isCellMin[i])
379             {
380                 fac += cell_size[i];
381             }
382         }
383         /* This condition is ensured by the assertion at the end of the loop */
384         GMX_ASSERT(fac > 0, "We cannot have 0 size to work with");
385         fac = (region_size - nmin * cellsize_limit_f)
386               / fac; /* substracting cells already set to cellsize_limit_f */
387         /* Determine the cell boundaries */
388         for (int i = range[0]; i < range[1]; i++)
389         {
390             if (!rowMaster->isCellMin[i])
391             {
392                 cell_size[i] *= fac;
393                 const real cellsize_limit_f_i =
394                         (!dimHasPbc && (i == 0 || i == dd->numCells[dim] - 1)) ? 0 : cellsize_limit_f;
395                 if (cell_size[i] < cellsize_limit_f_i)
396                 {
397                     rowMaster->isCellMin[i] = true;
398                     cell_size[i]            = cellsize_limit_f_i;
399                     nmin++;
400                 }
401             }
402             rowMaster->cellFrac[i + 1] = rowMaster->cellFrac[i] + cell_size[i];
403         }
404
405         /* This is ensured by the assertion at the beginning of this function */
406         GMX_ASSERT(nmin < range[1] - range[0], "We can not have all cells limited");
407     } while (nmin > nmin_old);
408
409     const int i  = range[1] - 1;
410     cell_size[i] = rowMaster->cellFrac[i + 1] - rowMaster->cellFrac[i];
411     /* For this check we should not use DD_CELL_MARGIN,
412      * but a slightly smaller factor,
413      * since rounding could get use below the limit.
414      */
415     if (dimHasPbc && cell_size[i] < cellsize_limit_f * DD_CELL_MARGIN2 / DD_CELL_MARGIN)
416     {
417         char buf[22];
418         gmx_fatal(FARGS,
419                   "step %s: the dynamic load balancing could not balance dimension %c: box size "
420                   "%f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
421                   gmx_step_str(step, buf),
422                   dim2char(dim),
423                   ddbox->box_size[dim],
424                   ddbox->skew_fac[dim],
425                   ncd,
426                   comm->cellsize_min[dim]);
427     }
428
429     rowMaster->dlbIsLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
430
431     if (!bUniform)
432     {
433         /* Check if the boundary did not displace more than halfway
434          * each of the cells it bounds, as this could cause problems,
435          * especially when the differences between cell sizes are large.
436          * If changes are applied, they will not make cells smaller
437          * than the cut-off, as we check all the boundaries which
438          * might be affected by a change and if the old state was ok,
439          * the cells will at most be shrunk back to their old size.
440          */
441         for (int i = range[0] + 1; i < range[1]; i++)
442         {
443             real halfway = 0.5 * (rowMaster->oldCellFrac[i] + rowMaster->oldCellFrac[i - 1]);
444             if (rowMaster->cellFrac[i] < halfway)
445             {
446                 rowMaster->cellFrac[i] = halfway;
447                 /* Check if the change also causes shifts of the next boundaries */
448                 for (int j = i + 1; j < range[1]; j++)
449                 {
450                     if (rowMaster->cellFrac[j] < rowMaster->cellFrac[j - 1] + cellsize_limit_f)
451                     {
452                         rowMaster->cellFrac[j] = rowMaster->cellFrac[j - 1] + cellsize_limit_f;
453                     }
454                 }
455             }
456             halfway = 0.5 * (rowMaster->oldCellFrac[i] + rowMaster->oldCellFrac[i + 1]);
457             if (rowMaster->cellFrac[i] > halfway)
458             {
459                 rowMaster->cellFrac[i] = halfway;
460                 /* Check if the change also causes shifts of the next boundaries */
461                 for (int j = i - 1; j >= range[0] + 1; j--)
462                 {
463                     if (rowMaster->cellFrac[j] > rowMaster->cellFrac[j + 1] - cellsize_limit_f)
464                     {
465                         rowMaster->cellFrac[j] = rowMaster->cellFrac[j + 1] - cellsize_limit_f;
466                     }
467                 }
468             }
469         }
470     }
471
472     /* nrange is defined as [lower, upper) range for new call to enforce_limits */
473     /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest
474      * following) (b) then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta).
475      * oldb and nexta can be the boundaries. for a and b nrange is used */
476     if (d > 0)
477     {
478         /* Take care of the staggering of the cell boundaries */
479         if (bUniform)
480         {
481             for (int i = range[0]; i < range[1]; i++)
482             {
483                 rowMaster->bounds[i].cellFracLowerMax = rowMaster->cellFrac[i];
484                 rowMaster->bounds[i].cellFracUpperMin = rowMaster->cellFrac[i + 1];
485             }
486         }
487         else
488         {
489             for (int i = range[0] + 1; i < range[1]; i++)
490             {
491                 const RowMaster::Bounds& bounds = rowMaster->bounds[i];
492
493                 bool bLimLo = (rowMaster->cellFrac[i] < bounds.boundMin);
494                 bool bLimHi = (rowMaster->cellFrac[i] > bounds.boundMax);
495                 if (bLimLo && bLimHi)
496                 {
497                     /* Both limits violated, try the best we can */
498                     /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
499                     rowMaster->cellFrac[i] = 0.5 * (bounds.boundMin + bounds.boundMax);
500                     nrange[0]              = range[0];
501                     nrange[1]              = i;
502                     dd_cell_sizes_dlb_root_enforce_limits(
503                             dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
504
505                     nrange[0] = i;
506                     nrange[1] = range[1];
507                     dd_cell_sizes_dlb_root_enforce_limits(
508                             dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
509
510                     return;
511                 }
512                 else if (bLimLo)
513                 {
514                     /* rowMaster->cellFrac[i] = rowMaster->boundMin[i]; */
515                     nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
516                     bLastHi   = FALSE;
517                 }
518                 else if (bLimHi && !bLastHi)
519                 {
520                     bLastHi = TRUE;
521                     if (nrange[1] < range[1]) /* found a LimLo before */
522                     {
523                         rowMaster->cellFrac[nrange[1]] = rowMaster->bounds[nrange[1]].boundMin;
524                         dd_cell_sizes_dlb_root_enforce_limits(
525                                 dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
526                         nrange[0] = nrange[1];
527                     }
528                     rowMaster->cellFrac[i] = rowMaster->bounds[i].boundMax;
529                     nrange[1]              = i;
530                     dd_cell_sizes_dlb_root_enforce_limits(
531                             dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
532                     nrange[0] = i;
533                     nrange[1] = range[1];
534                 }
535             }
536             if (nrange[1] < range[1]) /* found last a LimLo */
537             {
538                 rowMaster->cellFrac[nrange[1]] = rowMaster->bounds[nrange[1]].boundMin;
539                 dd_cell_sizes_dlb_root_enforce_limits(
540                         dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
541                 nrange[0] = nrange[1];
542                 nrange[1] = range[1];
543                 dd_cell_sizes_dlb_root_enforce_limits(
544                         dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
545             }
546             else if (nrange[0] > range[0]) /* found at least one LimHi */
547             {
548                 dd_cell_sizes_dlb_root_enforce_limits(
549                         dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, nrange);
550             }
551         }
552     }
553 }
554
555
556 static void set_dd_cell_sizes_dlb_root(gmx_domdec_t*      dd,
557                                        int                d,
558                                        int                dim,
559                                        RowMaster*         rowMaster,
560                                        const gmx_ddbox_t* ddbox,
561                                        gmx_bool           bDynamicBox,
562                                        gmx_bool           bUniform,
563                                        int64_t            step)
564 {
565     gmx_domdec_comm_t* comm    = dd->comm;
566     constexpr real     c_relax = 0.5;
567     int                range[] = { 0, 0 };
568
569     /* Convert the maximum change from the input percentage to a fraction */
570     const real change_limit = comm->ddSettings.dlb_scale_lim * 0.01;
571
572     const int ncd = dd->numCells[dim];
573
574     const bool bPBC = (dim < ddbox->npbcdim);
575
576     gmx::ArrayRef<real> cell_size = rowMaster->buf_ncd;
577
578     /* Store the original boundaries */
579     for (int i = 0; i < ncd + 1; i++)
580     {
581         rowMaster->oldCellFrac[i] = rowMaster->cellFrac[i];
582     }
583     if (bUniform)
584     {
585         for (int i = 0; i < ncd; i++)
586         {
587             cell_size[i] = 1.0 / ncd;
588         }
589     }
590     else if (dd_load_count(comm) > 0)
591     {
592         real load_aver  = comm->load[d].sum_m / ncd;
593         real change_max = 0;
594         for (int i = 0; i < ncd; i++)
595         {
596             /* Determine the relative imbalance of cell i */
597             const real load_i    = comm->load[d].load[i * comm->load[d].nload + 2];
598             const real imbalance = (load_i - load_aver) / (load_aver > 0 ? load_aver : 1);
599             /* Determine the change of the cell size using underrelaxation */
600             const real change = -c_relax * imbalance;
601             change_max        = std::max(change_max, std::max(change, -change));
602         }
603         /* Limit the amount of scaling.
604          * We need to use the same rescaling for all cells in one row,
605          * otherwise the load balancing might not converge.
606          */
607         real sc = c_relax;
608         if (change_max > change_limit)
609         {
610             sc *= change_limit / change_max;
611         }
612         for (int i = 0; i < ncd; i++)
613         {
614             /* Determine the relative imbalance of cell i */
615             const real load_i    = comm->load[d].load[i * comm->load[d].nload + 2];
616             const real imbalance = (load_i - load_aver) / (load_aver > 0 ? load_aver : 1);
617             /* Determine the change of the cell size using underrelaxation */
618             const real change = -sc * imbalance;
619             cell_size[i] = (rowMaster->cellFrac[i + 1] - rowMaster->cellFrac[i]) * (1 + change);
620         }
621     }
622
623     real cellsize_limit_f = cellsize_min_dlb(comm, d, dim) / ddbox->box_size[dim];
624     cellsize_limit_f *= DD_CELL_MARGIN;
625     real dist_min_f_hard = grid_jump_limit(comm, comm->systemInfo.cutoff, d) / ddbox->box_size[dim];
626     real dist_min_f      = dist_min_f_hard * DD_CELL_MARGIN;
627     if (ddbox->tric_dir[dim])
628     {
629         cellsize_limit_f /= ddbox->skew_fac[dim];
630         dist_min_f /= ddbox->skew_fac[dim];
631     }
632     if (bDynamicBox && d > 0)
633     {
634         dist_min_f *= DD_PRES_SCALE_MARGIN;
635     }
636     if (d > 0 && !bUniform)
637     {
638         /* Make sure that the grid is not shifted too much */
639         for (int i = 1; i < ncd; i++)
640         {
641             const RowMaster::Bounds& boundsNeighbor = rowMaster->bounds[i - 1];
642             RowMaster::Bounds&       bounds         = rowMaster->bounds[i];
643
644             if (bounds.cellFracUpperMin - boundsNeighbor.cellFracLowerMax < 2 * dist_min_f_hard)
645             {
646                 gmx_incons("Inconsistent DD boundary staggering limits!");
647             }
648             bounds.boundMin = boundsNeighbor.cellFracLowerMax + dist_min_f;
649             real space = rowMaster->cellFrac[i] - (boundsNeighbor.cellFracLowerMax + dist_min_f);
650             if (space > 0)
651             {
652                 bounds.boundMin += 0.5 * space;
653             }
654             bounds.boundMax = bounds.cellFracUpperMin - dist_min_f;
655             space           = rowMaster->cellFrac[i] - (bounds.cellFracUpperMin - dist_min_f);
656             if (space < 0)
657             {
658                 rowMaster->bounds[i].boundMax += 0.5 * space;
659             }
660             if (debug)
661             {
662                 fprintf(debug,
663                         "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
664                         d,
665                         i,
666                         boundsNeighbor.cellFracLowerMax + dist_min_f,
667                         bounds.boundMin,
668                         rowMaster->cellFrac[i],
669                         bounds.boundMax,
670                         bounds.cellFracUpperMin - dist_min_f);
671             }
672         }
673     }
674     range[1]                 = ncd;
675     rowMaster->cellFrac[0]   = 0;
676     rowMaster->cellFrac[ncd] = 1;
677     dd_cell_sizes_dlb_root_enforce_limits(
678             dd, d, dim, rowMaster, ddbox, bUniform, step, cellsize_limit_f, range);
679
680
681     /* After the checks above, the cells should obey the cut-off
682      * restrictions, but it does not hurt to check.
683      */
684     for (int i = 0; i < ncd; i++)
685     {
686         if (debug)
687         {
688             fprintf(debug,
689                     "Relative bounds dim %d  cell %d: %f %f\n",
690                     dim,
691                     i,
692                     rowMaster->cellFrac[i],
693                     rowMaster->cellFrac[i + 1]);
694         }
695
696         if ((bPBC || (i != 0 && i != dd->numCells[dim] - 1))
697             && rowMaster->cellFrac[i + 1] - rowMaster->cellFrac[i] < cellsize_limit_f / DD_CELL_MARGIN)
698         {
699             char buf[22];
700             fprintf(stderr,
701                     "\nWARNING step %s: direction %c, cell %d too small: %f\n",
702                     gmx_step_str(step, buf),
703                     dim2char(dim),
704                     i,
705                     (rowMaster->cellFrac[i + 1] - rowMaster->cellFrac[i]) * ddbox->box_size[dim]
706                             * ddbox->skew_fac[dim]);
707         }
708     }
709
710     int pos = ncd + 1;
711     /* Store the cell boundaries of the lower dimensions at the end */
712     for (int d1 = 0; d1 < d; d1++)
713     {
714         rowMaster->cellFrac[pos++] = comm->cellsizesWithDlb[d1].fracLower;
715         rowMaster->cellFrac[pos++] = comm->cellsizesWithDlb[d1].fracUpper;
716     }
717
718     DDRankSetup& ddRankSetup = comm->ddRankSetup;
719     if (d < ddRankSetup.npmedecompdim)
720     {
721         /* The master determines the maximum shift for
722          * the coordinate communication between separate PME nodes.
723          */
724         set_pme_maxshift(dd, &ddRankSetup.ddpme[d], bUniform, ddbox, rowMaster->cellFrac.data());
725     }
726     rowMaster->cellFrac[pos++] = ddRankSetup.ddpme[0].maxshift;
727     if (d >= 1)
728     {
729         rowMaster->cellFrac[pos++] = ddRankSetup.ddpme[1].maxshift;
730     }
731 }
732
733 static void relative_to_absolute_cell_bounds(gmx_domdec_t* dd, const gmx_ddbox_t* ddbox, int dimind)
734 {
735     gmx_domdec_comm_t*        comm      = dd->comm;
736     const DDCellsizesWithDlb& cellsizes = comm->cellsizesWithDlb[dimind];
737
738     /* Set the cell dimensions */
739     int dim            = dd->dim[dimind];
740     comm->cell_x0[dim] = cellsizes.fracLower * ddbox->box_size[dim];
741     comm->cell_x1[dim] = cellsizes.fracUpper * ddbox->box_size[dim];
742     if (dim >= ddbox->nboundeddim)
743     {
744         comm->cell_x0[dim] += ddbox->box0[dim];
745         comm->cell_x1[dim] += ddbox->box0[dim];
746     }
747 }
748
749 static void distribute_dd_cell_sizes_dlb(gmx_domdec_t*       dd,
750                                          int                 d,
751                                          int                 dim,
752                                          gmx::ArrayRef<real> cellFracRow,
753                                          const gmx_ddbox_t*  ddbox)
754 {
755     gmx_domdec_comm_t& comm = *dd->comm;
756
757 #if GMX_MPI
758     /* Each node would only need to know two fractions,
759      * but it is probably cheaper to broadcast the whole array.
760      */
761     MPI_Bcast(cellFracRow.data(),
762               ddCellFractionBufferSize(dd, d) * sizeof(real),
763               MPI_BYTE,
764               0,
765               comm.mpi_comm_load[d]);
766 #endif
767     /* Copy the fractions for this dimension from the buffer */
768     comm.cellsizesWithDlb[d].fracLower = cellFracRow[dd->ci[dim]];
769     comm.cellsizesWithDlb[d].fracUpper = cellFracRow[dd->ci[dim] + 1];
770     /* The whole array was communicated, so set the buffer position */
771     int pos = dd->numCells[dim] + 1;
772     for (int d1 = 0; d1 <= d; d1++)
773     {
774         if (d1 < d)
775         {
776             /* Copy the cell fractions of the lower dimensions */
777             comm.cellsizesWithDlb[d1].fracLower = cellFracRow[pos++];
778             comm.cellsizesWithDlb[d1].fracUpper = cellFracRow[pos++];
779         }
780         relative_to_absolute_cell_bounds(dd, ddbox, d1);
781     }
782     /* Convert the communicated shift from float to int */
783     comm.ddRankSetup.ddpme[0].maxshift = gmx::roundToInt(cellFracRow[pos++]);
784     if (d >= 1)
785     {
786         comm.ddRankSetup.ddpme[1].maxshift = gmx::roundToInt(cellFracRow[pos++]);
787     }
788 }
789
790 static void set_dd_cell_sizes_dlb_change(gmx_domdec_t*      dd,
791                                          const gmx_ddbox_t* ddbox,
792                                          gmx_bool           bDynamicBox,
793                                          gmx_bool           bUniform,
794                                          int64_t            step)
795 {
796     for (int d = 0; d < dd->ndim; d++)
797     {
798         const int dim         = dd->dim[d];
799         bool      isRowMember = true;
800         bool      isRowRoot   = true;
801         for (int d1 = d; d1 < dd->ndim; d1++)
802         {
803             if (dd->ci[dd->dim[d1]] > 0)
804             {
805                 if (d1 != d)
806                 {
807                     isRowMember = false;
808                 }
809                 isRowRoot = false;
810             }
811         }
812         if (isRowMember)
813         {
814             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[d];
815             gmx::ArrayRef<real> cellFracRow;
816
817             if (isRowRoot)
818             {
819                 RowMaster* rowMaster = cellsizes.rowMaster.get();
820                 set_dd_cell_sizes_dlb_root(dd, d, dim, rowMaster, ddbox, bDynamicBox, bUniform, step);
821                 cellFracRow = rowMaster->cellFrac;
822             }
823             else
824             {
825                 cellFracRow = cellsizes.fracRow;
826             }
827             distribute_dd_cell_sizes_dlb(dd, d, dim, cellFracRow, ddbox);
828         }
829     }
830 }
831
832 static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t* dd, const gmx_ddbox_t* ddbox)
833 {
834     /* This function assumes the box is static and should therefore
835      * not be called when the box has changed since the last
836      * call to dd_partition_system.
837      */
838     for (int d = 0; d < dd->ndim; d++)
839     {
840         relative_to_absolute_cell_bounds(dd, ddbox, d);
841     }
842 }
843
844
845 static void set_dd_cell_sizes_dlb(gmx_domdec_t*      dd,
846                                   const gmx_ddbox_t* ddbox,
847                                   gmx_bool           bDynamicBox,
848                                   gmx_bool           bUniform,
849                                   gmx_bool           bDoDLB,
850                                   int64_t            step,
851                                   gmx_wallcycle*     wcycle)
852 {
853     gmx_domdec_comm_t* comm = dd->comm;
854
855     if (bDoDLB)
856     {
857         wallcycle_start(wcycle, WallCycleCounter::DDCommBound);
858         set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
859         wallcycle_stop(wcycle, WallCycleCounter::DDCommBound);
860     }
861     else if (bDynamicBox)
862     {
863         set_dd_cell_sizes_dlb_nochange(dd, ddbox);
864     }
865
866     /* Set the dimensions for which no DD is used */
867     for (int dim = 0; dim < DIM; dim++)
868     {
869         if (dd->numCells[dim] == 1)
870         {
871             comm->cell_x0[dim] = 0;
872             comm->cell_x1[dim] = ddbox->box_size[dim];
873             if (dim >= ddbox->nboundeddim)
874             {
875                 comm->cell_x0[dim] += ddbox->box0[dim];
876                 comm->cell_x1[dim] += ddbox->box0[dim];
877             }
878         }
879     }
880 }
881
882 void set_dd_cell_sizes(gmx_domdec_t*      dd,
883                        const gmx_ddbox_t* ddbox,
884                        gmx_bool           bDynamicBox,
885                        gmx_bool           bUniform,
886                        gmx_bool           bDoDLB,
887                        int64_t            step,
888                        gmx_wallcycle*     wcycle)
889 {
890     gmx_domdec_comm_t* comm = dd->comm;
891
892     /* Copy the old cell boundaries for the cg displacement check */
893     copy_rvec(comm->cell_x0, comm->old_cell_x0);
894     copy_rvec(comm->cell_x1, comm->old_cell_x1);
895
896     if (isDlbOn(comm))
897     {
898         if (DDMASTER(dd))
899         {
900             check_box_size(dd, ddbox);
901         }
902         set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
903     }
904     else
905     {
906         ivec numPulses;
907         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, numPulses);
908
909         /* Check if the change in cell size requires a different number
910          * of communication pulses and if so change the number.
911          */
912         for (int d = 0; d < dd->ndim; d++)
913         {
914             gmx_domdec_comm_dim_t& cd           = comm->cd[d];
915             int                    numPulsesDim = numPulses[dd->dim[d]];
916             if (cd.numPulses() != numPulsesDim)
917             {
918                 if (debug)
919                 {
920                     fprintf(debug,
921                             "Changing the number of halo communication pulses along dim %c from %d "
922                             "to %d\n",
923                             dim2char(dd->dim[d]),
924                             cd.numPulses(),
925                             numPulsesDim);
926                 }
927                 cd.ind.resize(numPulsesDim);
928             }
929         }
930     }
931
932     if (debug)
933     {
934         for (int d = 0; d < DIM; d++)
935         {
936             fprintf(debug,
937                     "cell_x[%d] %f - %f skew_fac %f\n",
938                     d,
939                     comm->cell_x0[d],
940                     comm->cell_x1[d],
941                     ddbox->skew_fac[d]);
942         }
943     }
944 }