d684d8057e27002fe6b638bb87eddb725e42b0c8
[alexxy/gromacs.git] / src / gromacs / domdec / domdec.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
7  * Copyright (c) 2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38
39 #include "gmxpre.h"
40
41 #include "domdec.h"
42
43 #include "config.h"
44
45 #include <cassert>
46 #include <cinttypes>
47 #include <climits>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
52
53 #include <algorithm>
54 #include <memory>
55
56 #include "gromacs/domdec/builder.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlb.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/ga2la.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/options.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/gmxlib/network.h"
66 #include "gromacs/gmxlib/nrnb.h"
67 #include "gromacs/gpu_utils/device_stream_manager.h"
68 #include "gromacs/gpu_utils/gpu_utils.h"
69 #include "gromacs/hardware/hw_info.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/math/vectypes.h"
72 #include "gromacs/mdlib/calc_verletbuf.h"
73 #include "gromacs/mdlib/constr.h"
74 #include "gromacs/mdlib/constraintrange.h"
75 #include "gromacs/mdlib/updategroups.h"
76 #include "gromacs/mdlib/vsite.h"
77 #include "gromacs/mdtypes/commrec.h"
78 #include "gromacs/mdtypes/forceoutput.h"
79 #include "gromacs/mdtypes/inputrec.h"
80 #include "gromacs/mdtypes/mdrunoptions.h"
81 #include "gromacs/mdtypes/state.h"
82 #include "gromacs/pbcutil/ishift.h"
83 #include "gromacs/pbcutil/pbc.h"
84 #include "gromacs/pulling/pull.h"
85 #include "gromacs/timing/wallcycle.h"
86 #include "gromacs/topology/block.h"
87 #include "gromacs/topology/idef.h"
88 #include "gromacs/topology/ifunc.h"
89 #include "gromacs/topology/mtop_lookup.h"
90 #include "gromacs/topology/mtop_util.h"
91 #include "gromacs/topology/topology.h"
92 #include "gromacs/utility/basedefinitions.h"
93 #include "gromacs/utility/basenetwork.h"
94 #include "gromacs/utility/cstringutil.h"
95 #include "gromacs/utility/enumerationhelpers.h"
96 #include "gromacs/utility/exceptions.h"
97 #include "gromacs/utility/fatalerror.h"
98 #include "gromacs/utility/gmxmpi.h"
99 #include "gromacs/utility/logger.h"
100 #include "gromacs/utility/real.h"
101 #include "gromacs/utility/smalloc.h"
102 #include "gromacs/utility/strconvert.h"
103 #include "gromacs/utility/stringstream.h"
104 #include "gromacs/utility/stringutil.h"
105 #include "gromacs/utility/textwriter.h"
106
107 #include "atomdistribution.h"
108 #include "box.h"
109 #include "cellsizes.h"
110 #include "distribute.h"
111 #include "domdec_constraints.h"
112 #include "domdec_internal.h"
113 #include "domdec_setup.h"
114 #include "domdec_vsite.h"
115 #include "redistribute.h"
116 #include "utility.h"
117
118 // TODO remove this when moving domdec into gmx namespace
119 using gmx::DdRankOrder;
120 using gmx::DlbOption;
121 using gmx::DomdecOptions;
122
123 static const char* enumValueToString(DlbState enumValue)
124 {
125     static constexpr gmx::EnumerationArray<DlbState, const char*> dlbStateNames = {
126         "off", "off", "auto", "locked", "on", "on"
127     };
128     return dlbStateNames[enumValue];
129 }
130
131 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
132 #define DD_CGIBS 2
133
134 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
135 #define DD_FLAG_NRCG 65535
136 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
137 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
138
139 /* The DD zone order */
140 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
141                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
142
143 /* The non-bonded zone-pair setup for domain decomposition
144  * The first number is the i-zone, the second number the first j-zone seen by
145  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
146  * As is, this is for 3D decomposition, where there are 4 i-zones.
147  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
148  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
149  */
150 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
151                                                                { 1, 3, 6 },
152                                                                { 2, 5, 6 },
153                                                                { 3, 5, 7 } };
154
155 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
156 {
157     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
158     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
159     xyz[ZZ] = ind % nc[ZZ];
160 }
161
162 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
163 {
164     int ddnodeid = -1;
165
166     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
167     const int                 ddindex   = dd_index(dd->numCells, c);
168     if (cartSetup.bCartesianPP_PME)
169     {
170         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
171     }
172     else if (cartSetup.bCartesianPP)
173     {
174 #if GMX_MPI
175         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
176 #endif
177     }
178     else
179     {
180         ddnodeid = ddindex;
181     }
182
183     return ddnodeid;
184 }
185
186 int ddglatnr(const gmx_domdec_t* dd, int i)
187 {
188     int atnr = 0;
189
190     if (dd == nullptr)
191     {
192         atnr = i + 1;
193     }
194     else
195     {
196         if (i >= dd->comm->atomRanges.numAtomsTotal())
197         {
198             gmx_fatal(FARGS,
199                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
200                       i,
201                       dd->comm->atomRanges.numAtomsTotal());
202         }
203         atnr = dd->globalAtomIndices[i] + 1;
204     }
205
206     return atnr;
207 }
208
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingsPerMoleculeType(const gmx_domdec_t& dd)
210 {
211     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212     return dd.comm->systemInfo.updateGroupingsPerMoleculeType;
213 }
214
215 void dd_store_state(const gmx_domdec_t& dd, t_state* state)
216 {
217     if (state->ddp_count != dd.ddp_count)
218     {
219         gmx_incons("The MD state does not match the domain decomposition state");
220     }
221
222     state->cg_gl.resize(dd.ncg_home);
223     for (int i = 0; i < dd.ncg_home; i++)
224     {
225         state->cg_gl[i] = dd.globalAtomGroupIndices[i];
226     }
227
228     state->ddp_count_cg_gl = dd.ddp_count;
229 }
230
231 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
232 {
233     return &dd->comm->zones;
234 }
235
236 int dd_numAtomsZones(const gmx_domdec_t& dd)
237 {
238     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
239 }
240
241 int dd_numHomeAtoms(const gmx_domdec_t& dd)
242 {
243     return dd.comm->atomRanges.numHomeAtoms();
244 }
245
246 int dd_natoms_mdatoms(const gmx_domdec_t& dd)
247 {
248     /* We currently set mdatoms entries for all atoms:
249      * local + non-local + communicated for vsite + constraints
250      */
251
252     return dd.comm->atomRanges.numAtomsTotal();
253 }
254
255 int dd_natoms_vsite(const gmx_domdec_t& dd)
256 {
257     return dd.comm->atomRanges.end(DDAtomRanges::Type::Vsites);
258 }
259
260 void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
261 {
262     *at_start = dd.comm->atomRanges.start(DDAtomRanges::Type::Constraints);
263     *at_end   = dd.comm->atomRanges.end(DDAtomRanges::Type::Constraints);
264 }
265
266 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
267 {
268     wallcycle_start(wcycle, WallCycleCounter::MoveX);
269
270     rvec shift = { 0, 0, 0 };
271
272     gmx_domdec_comm_t* comm = dd->comm;
273
274     int nzone   = 1;
275     int nat_tot = comm->atomRanges.numHomeAtoms();
276     for (int d = 0; d < dd->ndim; d++)
277     {
278         const bool bPBC   = (dd->ci[dd->dim[d]] == 0);
279         const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
280         if (bPBC)
281         {
282             copy_rvec(box[dd->dim[d]], shift);
283         }
284         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
285         for (const gmx_domdec_ind_t& ind : cd->ind)
286         {
287             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
288             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
289             int                       n          = 0;
290             if (!bPBC)
291             {
292                 for (int j : ind.index)
293                 {
294                     sendBuffer[n] = x[j];
295                     n++;
296                 }
297             }
298             else if (!bScrew)
299             {
300                 for (int j : ind.index)
301                 {
302                     /* We need to shift the coordinates */
303                     for (int d = 0; d < DIM; d++)
304                     {
305                         sendBuffer[n][d] = x[j][d] + shift[d];
306                     }
307                     n++;
308                 }
309             }
310             else
311             {
312                 for (int j : ind.index)
313                 {
314                     /* Shift x */
315                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
316                     /* Rotate y and z.
317                      * This operation requires a special shift force
318                      * treatment, which is performed in calc_vir.
319                      */
320                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
321                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
322                     n++;
323                 }
324             }
325
326             DDBufferAccess<gmx::RVec> receiveBufferAccess(
327                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
328
329             gmx::ArrayRef<gmx::RVec> receiveBuffer;
330             if (cd->receiveInPlace)
331             {
332                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
333             }
334             else
335             {
336                 receiveBuffer = receiveBufferAccess.buffer;
337             }
338             /* Send and receive the coordinates */
339             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
340
341             if (!cd->receiveInPlace)
342             {
343                 int j = 0;
344                 for (int zone = 0; zone < nzone; zone++)
345                 {
346                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
347                     {
348                         x[i] = receiveBuffer[j++];
349                     }
350                 }
351             }
352             nat_tot += ind.nrecv[nzone + 1];
353         }
354         nzone += nzone;
355     }
356
357     wallcycle_stop(wcycle, WallCycleCounter::MoveX);
358 }
359
360 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
361 {
362     wallcycle_start(wcycle, WallCycleCounter::MoveF);
363
364     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
365     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
366
367     gmx_domdec_comm_t& comm    = *dd->comm;
368     int                nzone   = comm.zones.n / 2;
369     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
370     for (int d = dd->ndim - 1; d >= 0; d--)
371     {
372         /* Only forces in domains near the PBC boundaries need to
373            consider PBC in the treatment of fshift */
374         const bool shiftForcesNeedPbc =
375                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
376         const bool applyScrewPbc =
377                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
378         /* Determine which shift vector we need */
379         ivec vis        = { 0, 0, 0 };
380         vis[dd->dim[d]] = 1;
381         const int is    = gmx::ivecToShiftIndex(vis);
382
383         /* Loop over the pulses */
384         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
385         for (int p = cd.numPulses() - 1; p >= 0; p--)
386         {
387             const gmx_domdec_ind_t&   ind = cd.ind[p];
388             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
389             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
390
391             nat_tot -= ind.nrecv[nzone + 1];
392
393             DDBufferAccess<gmx::RVec> sendBufferAccess(
394                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
395
396             gmx::ArrayRef<gmx::RVec> sendBuffer;
397             if (cd.receiveInPlace)
398             {
399                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
400             }
401             else
402             {
403                 sendBuffer = sendBufferAccess.buffer;
404                 int j      = 0;
405                 for (int zone = 0; zone < nzone; zone++)
406                 {
407                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
408                     {
409                         sendBuffer[j++] = f[i];
410                     }
411                 }
412             }
413             /* Communicate the forces */
414             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
415             /* Add the received forces */
416             int n = 0;
417             if (!shiftForcesNeedPbc)
418             {
419                 for (int j : ind.index)
420                 {
421                     for (int d = 0; d < DIM; d++)
422                     {
423                         f[j][d] += receiveBuffer[n][d];
424                     }
425                     n++;
426                 }
427             }
428             else if (!applyScrewPbc)
429             {
430                 for (int j : ind.index)
431                 {
432                     for (int d = 0; d < DIM; d++)
433                     {
434                         f[j][d] += receiveBuffer[n][d];
435                     }
436                     /* Add this force to the shift force */
437                     for (int d = 0; d < DIM; d++)
438                     {
439                         fshift[is][d] += receiveBuffer[n][d];
440                     }
441                     n++;
442                 }
443             }
444             else
445             {
446                 for (int j : ind.index)
447                 {
448                     /* Rotate the force */
449                     f[j][XX] += receiveBuffer[n][XX];
450                     f[j][YY] -= receiveBuffer[n][YY];
451                     f[j][ZZ] -= receiveBuffer[n][ZZ];
452                     if (shiftForcesNeedPbc)
453                     {
454                         /* Add this force to the shift force */
455                         for (int d = 0; d < DIM; d++)
456                         {
457                             fshift[is][d] += receiveBuffer[n][d];
458                         }
459                     }
460                     n++;
461                 }
462             }
463         }
464         nzone /= 2;
465     }
466     wallcycle_stop(wcycle, WallCycleCounter::MoveF);
467 }
468
469 /* Convenience function for extracting a real buffer from an rvec buffer
470  *
471  * To reduce the number of temporary communication buffers and avoid
472  * cache polution, we reuse gmx::RVec buffers for storing reals.
473  * This functions return a real buffer reference with the same number
474  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
475  */
476 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
477 {
478     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
479 }
480
481 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
482 {
483     gmx_domdec_comm_t* comm = dd->comm;
484
485     int nzone   = 1;
486     int nat_tot = comm->atomRanges.numHomeAtoms();
487     for (int d = 0; d < dd->ndim; d++)
488     {
489         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
490         for (const gmx_domdec_ind_t& ind : cd->ind)
491         {
492             /* Note: We provision for RVec instead of real, so a factor of 3
493              * more than needed. The buffer actually already has this size
494              * and we pass a plain pointer below, so this does not matter.
495              */
496             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
497             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
498             int                 n          = 0;
499             for (int j : ind.index)
500             {
501                 sendBuffer[n++] = v[j];
502             }
503
504             DDBufferAccess<gmx::RVec> receiveBufferAccess(
505                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
506
507             gmx::ArrayRef<real> receiveBuffer;
508             if (cd->receiveInPlace)
509             {
510                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
511             }
512             else
513             {
514                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
515             }
516             /* Send and receive the data */
517             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
518             if (!cd->receiveInPlace)
519             {
520                 int j = 0;
521                 for (int zone = 0; zone < nzone; zone++)
522                 {
523                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
524                     {
525                         v[i] = receiveBuffer[j++];
526                     }
527                 }
528             }
529             nat_tot += ind.nrecv[nzone + 1];
530         }
531         nzone += nzone;
532     }
533 }
534
535 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
536 {
537     gmx_domdec_comm_t* comm = dd->comm;
538
539     int nzone   = comm->zones.n / 2;
540     int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
541     for (int d = dd->ndim - 1; d >= 0; d--)
542     {
543         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
544         for (int p = cd->numPulses() - 1; p >= 0; p--)
545         {
546             const gmx_domdec_ind_t& ind = cd->ind[p];
547
548             /* Note: We provision for RVec instead of real, so a factor of 3
549              * more than needed. The buffer actually already has this size
550              * and we typecast, so this works as intended.
551              */
552             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
553             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
554             nat_tot -= ind.nrecv[nzone + 1];
555
556             DDBufferAccess<gmx::RVec> sendBufferAccess(
557                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
558
559             gmx::ArrayRef<real> sendBuffer;
560             if (cd->receiveInPlace)
561             {
562                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
563             }
564             else
565             {
566                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
567                 int j      = 0;
568                 for (int zone = 0; zone < nzone; zone++)
569                 {
570                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
571                     {
572                         sendBuffer[j++] = v[i];
573                     }
574                 }
575             }
576             /* Communicate the forces */
577             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
578             /* Add the received forces */
579             int n = 0;
580             for (int j : ind.index)
581             {
582                 v[j] += receiveBuffer[n];
583                 n++;
584             }
585         }
586         nzone /= 2;
587     }
588 }
589
590 real dd_cutoff_multibody(const gmx_domdec_t* dd)
591 {
592     const gmx_domdec_comm_t& comm       = *dd->comm;
593     const DDSystemInfo&      systemInfo = comm.systemInfo;
594
595     real r = -1;
596     if (systemInfo.haveInterDomainMultiBodyBondeds)
597     {
598         if (comm.cutoff_mbody > 0)
599         {
600             r = comm.cutoff_mbody;
601         }
602         else
603         {
604             /* cutoff_mbody=0 means we do not have DLB */
605             r = comm.cellsize_min[dd->dim[0]];
606             for (int di = 1; di < dd->ndim; di++)
607             {
608                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
609             }
610             if (comm.systemInfo.filterBondedCommunication)
611             {
612                 r = std::max(r, comm.cutoff_mbody);
613             }
614             else
615             {
616                 r = std::min(r, systemInfo.cutoff);
617             }
618         }
619     }
620
621     return r;
622 }
623
624 real dd_cutoff_twobody(const gmx_domdec_t* dd)
625 {
626     const real r_mb = dd_cutoff_multibody(dd);
627
628     return std::max(dd->comm->systemInfo.cutoff, r_mb);
629 }
630
631
632 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
633                                    const CartesianRankSetup& cartSetup,
634                                    const ivec                coord,
635                                    ivec                      coord_pme)
636 {
637     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
638     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
639     copy_ivec(coord, coord_pme);
640     coord_pme[cartSetup.cartpmedim] =
641             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
642 }
643
644 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
645 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
646 {
647     const int npp  = ddRankSetup.numPPRanks;
648     const int npme = ddRankSetup.numRanksDoingPme;
649
650     /* Here we assign a PME node to communicate with this DD node
651      * by assuming that the major index of both is x.
652      * We add npme/2 to obtain an even distribution.
653      */
654     return (ddCellIndex * npme + npme / 2) / npp;
655 }
656
657 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
658 {
659     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
660
661     int n = 0;
662     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
663     {
664         const int p0 = ddindex2pmeindex(ddRankSetup, i);
665         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
666         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
667         {
668             if (debug)
669             {
670                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
671             }
672             pmeRanks[n] = i + 1 + n;
673             n++;
674         }
675     }
676
677     return pmeRanks;
678 }
679
680 static int gmx_ddcoord2pmeindex(const gmx_domdec_t& dd, int x, int y, int z)
681 {
682     ivec coords;
683
684     coords[XX]     = x;
685     coords[YY]     = y;
686     coords[ZZ]     = z;
687     const int slab = ddindex2pmeindex(dd.comm->ddRankSetup, dd_index(dd.numCells, coords));
688
689     return slab;
690 }
691
692 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
693 {
694     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
695     ivec                      coords    = { x, y, z };
696     int                       nodeid    = -1;
697
698     if (cartSetup.bCartesianPP_PME)
699     {
700 #if GMX_MPI
701         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
702 #endif
703     }
704     else
705     {
706         int ddindex = dd_index(cr->dd->numCells, coords);
707         if (cartSetup.bCartesianPP)
708         {
709             nodeid = cartSetup.ddindex2simnodeid[ddindex];
710         }
711         else
712         {
713             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
714             {
715                 nodeid = ddindex + gmx_ddcoord2pmeindex(*cr->dd, x, y, z);
716             }
717             else
718             {
719                 nodeid = ddindex;
720             }
721         }
722     }
723
724     return nodeid;
725 }
726
727 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
728                               const CartesianRankSetup& cartSetup,
729                               gmx::ArrayRef<const int>  pmeRanks,
730                               const t_commrec gmx_unused* cr,
731                               const int                   sim_nodeid)
732 {
733     int pmenode = -1;
734
735     /* This assumes a uniform x domain decomposition grid cell size */
736     if (cartSetup.bCartesianPP_PME)
737     {
738 #if GMX_MPI
739         ivec coord, coord_pme;
740         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
741         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
742         {
743             /* This is a PP rank */
744             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
745             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
746         }
747 #endif
748     }
749     else if (cartSetup.bCartesianPP)
750     {
751         if (sim_nodeid < ddRankSetup.numPPRanks)
752         {
753             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
754         }
755     }
756     else
757     {
758         /* This assumes DD cells with identical x coordinates
759          * are numbered sequentially.
760          */
761         if (pmeRanks.empty())
762         {
763             if (sim_nodeid < ddRankSetup.numPPRanks)
764             {
765                 /* The DD index equals the nodeid */
766                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
767             }
768         }
769         else
770         {
771             int i = 0;
772             while (sim_nodeid > pmeRanks[i])
773             {
774                 i++;
775             }
776             if (sim_nodeid < pmeRanks[i])
777             {
778                 pmenode = pmeRanks[i];
779             }
780         }
781     }
782
783     return pmenode;
784 }
785
786 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
787 {
788     if (dd != nullptr)
789     {
790         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
791     }
792     else
793     {
794         return { 1, 1 };
795     }
796 }
797
798 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
799 {
800     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
801     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
802     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
803                        "This function should only be called when PME-only ranks are in use");
804     std::vector<int> ddranks;
805     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
806
807     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
808     {
809         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
810         {
811             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
812             {
813                 if (cartSetup.bCartesianPP_PME)
814                 {
815                     ivec coord = { x, y, z };
816                     ivec coord_pme;
817                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
818                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
819                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
820                     {
821                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
822                     }
823                 }
824                 else
825                 {
826                     /* The slab corresponds to the nodeid in the PME group */
827                     if (gmx_ddcoord2pmeindex(*cr->dd, x, y, z) == pmenodeid)
828                     {
829                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
830                     }
831                 }
832             }
833         }
834     }
835     return ddranks;
836 }
837
838 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
839 {
840     bool bReceive = true;
841
842     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
843     if (ddRankSetup.usePmeOnlyRanks)
844     {
845         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
846         if (cartSetup.bCartesianPP_PME)
847         {
848 #if GMX_MPI
849             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
850             ivec coords;
851             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
852             coords[cartSetup.cartpmedim]++;
853             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
854             {
855                 int rank = 0;
856                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
857                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
858                 {
859                     /* This is not the last PP node for pmenode */
860                     bReceive = FALSE;
861                 }
862             }
863 #else
864             GMX_RELEASE_ASSERT(
865                     false,
866                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
867 #endif
868         }
869         else
870         {
871             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
872             if (cr->sim_nodeid + 1 < cr->nnodes
873                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
874             {
875                 /* This is not the last PP node for pmenode */
876                 bReceive = FALSE;
877             }
878         }
879     }
880
881     return bReceive;
882 }
883
884 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
885 {
886     gmx_domdec_comm_t* comm = dd->comm;
887
888     snew(*dim_f, dd->numCells[dim] + 1);
889     (*dim_f)[0] = 0;
890     for (int i = 1; i < dd->numCells[dim]; i++)
891     {
892         if (comm->slb_frac[dim])
893         {
894             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
895         }
896         else
897         {
898             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
899         }
900     }
901     (*dim_f)[dd->numCells[dim]] = 1;
902 }
903
904 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
905 {
906     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
907
908     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
909     {
910         ddpme->dim = YY;
911     }
912     else
913     {
914         ddpme->dim = dimind;
915     }
916     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
917
918     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
919
920     if (ddpme->nslab <= 1)
921     {
922         return;
923     }
924
925     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
926     /* Determine for each PME slab the PP location range for dimension dim */
927     snew(ddpme->pp_min, ddpme->nslab);
928     snew(ddpme->pp_max, ddpme->nslab);
929     for (int slab = 0; slab < ddpme->nslab; slab++)
930     {
931         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
932         ddpme->pp_max[slab] = 0;
933     }
934     for (int i = 0; i < dd->nnodes; i++)
935     {
936         ivec xyz;
937         ddindex2xyz(dd->numCells, i, xyz);
938         /* For y only use our y/z slab.
939          * This assumes that the PME x grid size matches the DD grid size.
940          */
941         if (dimind == 0 || xyz[XX] == dd->ci[XX])
942         {
943             const int pmeindex  = ddindex2pmeindex(ddRankSetup, i);
944             const int slab      = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
945             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
946             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
947         }
948     }
949
950     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
951 }
952
953 int dd_pme_maxshift_x(const gmx_domdec_t& dd)
954 {
955     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
956
957     if (ddRankSetup.ddpme[0].dim == XX)
958     {
959         return ddRankSetup.ddpme[0].maxshift;
960     }
961     else
962     {
963         return 0;
964     }
965 }
966
967 int dd_pme_maxshift_y(const gmx_domdec_t& dd)
968 {
969     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
970
971     if (ddRankSetup.ddpme[0].dim == YY)
972     {
973         return ddRankSetup.ddpme[0].maxshift;
974     }
975     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
976     {
977         return ddRankSetup.ddpme[1].maxshift;
978     }
979     else
980     {
981         return 0;
982     }
983 }
984
985 bool ddMayHaveSplitConstraints(const gmx_domdec_t& dd)
986 {
987     return dd.comm->systemInfo.mayHaveSplitConstraints;
988 }
989
990 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
991 {
992     return dd.comm->systemInfo.useUpdateGroups;
993 }
994
995 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
996 {
997     /* Note that the cycles value can be incorrect, either 0 or some
998      * extremely large value, when our thread migrated to another core
999      * with an unsynchronized cycle counter. If this happens less often
1000      * that once per nstlist steps, this will not cause issues, since
1001      * we later subtract the maximum value from the sum over nstlist steps.
1002      * A zero count will slightly lower the total, but that's a small effect.
1003      * Note that the main purpose of the subtraction of the maximum value
1004      * is to avoid throwing off the load balancing when stalls occur due
1005      * e.g. system activity or network congestion.
1006      */
1007     dd->comm->cycl[ddCycl] += cycles;
1008     dd->comm->cycl_n[ddCycl]++;
1009     if (cycles > dd->comm->cycl_max[ddCycl])
1010     {
1011         dd->comm->cycl_max[ddCycl] = cycles;
1012     }
1013 }
1014
1015 #if GMX_MPI
1016 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1017 {
1018     MPI_Comm c_row = MPI_COMM_NULL;
1019     ivec     loc_c;
1020     bool     bPartOfGroup = false;
1021
1022     const int dim = dd->dim[dim_ind];
1023     copy_ivec(loc, loc_c);
1024     for (int i = 0; i < dd->numCells[dim]; i++)
1025     {
1026         loc_c[dim]     = i;
1027         const int rank = dd_index(dd->numCells, loc_c);
1028         if (rank == dd->rank)
1029         {
1030             /* This process is part of the group */
1031             bPartOfGroup = TRUE;
1032         }
1033     }
1034     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1035     if (bPartOfGroup)
1036     {
1037         dd->comm->mpi_comm_load[dim_ind] = c_row;
1038         if (!isDlbDisabled(dd->comm))
1039         {
1040             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1041
1042             if (dd->ci[dim] == dd->master_ci[dim])
1043             {
1044                 /* This is the root process of this row */
1045                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1046
1047                 RowMaster& rowMaster = *cellsizes.rowMaster;
1048                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1049                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1050                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1051                 if (dim_ind > 0)
1052                 {
1053                     rowMaster.bounds.resize(dd->numCells[dim]);
1054                 }
1055                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1056             }
1057             else
1058             {
1059                 /* This is not a root process, we only need to receive cell_f */
1060                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1061             }
1062         }
1063         if (dd->ci[dim] == dd->master_ci[dim])
1064         {
1065             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1066         }
1067     }
1068 }
1069 #endif
1070
1071 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1072 {
1073 #if GMX_MPI
1074     MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
1075
1076     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1077     {
1078         /* Only ranks with short-ranged tasks (currently) use GPUs.
1079          * If we don't have GPUs assigned, there are no resources to share.
1080          */
1081         return;
1082     }
1083
1084     const int physicalnode_id_hash = gmx_physicalnode_id_hash();
1085
1086     gmx_domdec_t* dd = cr->dd;
1087
1088     if (debug)
1089     {
1090         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1091         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1092     }
1093     /* Split the PP communicator over the physical nodes */
1094     /* TODO: See if we should store this (before), as it's also used for
1095      * for the nodecomm summation.
1096      */
1097     // TODO PhysicalNodeCommunicator could be extended/used to handle
1098     // the need for per-node per-group communicators.
1099     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1100     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1101     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1102     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1103
1104     if (debug)
1105     {
1106         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1107     }
1108
1109     /* Note that some ranks could share a GPU, while others don't */
1110
1111     if (dd->comm->nrank_gpu_shared == 1)
1112     {
1113         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1114     }
1115 #else
1116     GMX_UNUSED_VALUE(cr);
1117     GMX_UNUSED_VALUE(gpu_id);
1118 #endif
1119 }
1120
1121 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1122 {
1123 #if GMX_MPI
1124     ivec loc;
1125
1126     if (debug)
1127     {
1128         fprintf(debug, "Making load communicators\n");
1129     }
1130
1131     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1132     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1133
1134     if (dd->ndim == 0)
1135     {
1136         return;
1137     }
1138
1139     clear_ivec(loc);
1140     make_load_communicator(dd, 0, loc);
1141     if (dd->ndim > 1)
1142     {
1143         const int dim0 = dd->dim[0];
1144         for (int i = 0; i < dd->numCells[dim0]; i++)
1145         {
1146             loc[dim0] = i;
1147             make_load_communicator(dd, 1, loc);
1148         }
1149     }
1150     if (dd->ndim > 2)
1151     {
1152         const int dim0 = dd->dim[0];
1153         for (int i = 0; i < dd->numCells[dim0]; i++)
1154         {
1155             loc[dim0]      = i;
1156             const int dim1 = dd->dim[1];
1157             for (int j = 0; j < dd->numCells[dim1]; j++)
1158             {
1159                 loc[dim1] = j;
1160                 make_load_communicator(dd, 2, loc);
1161             }
1162         }
1163     }
1164
1165     if (debug)
1166     {
1167         fprintf(debug, "Finished making load communicators\n");
1168     }
1169 #endif
1170 }
1171
1172 /*! \brief Sets up the relation between neighboring domains and zones */
1173 static void setup_neighbor_relations(gmx_domdec_t* dd)
1174 {
1175     ivec tmp, s;
1176     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1177
1178     for (int d = 0; d < dd->ndim; d++)
1179     {
1180         const int dim = dd->dim[d];
1181         copy_ivec(dd->ci, tmp);
1182         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1183         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1184         copy_ivec(dd->ci, tmp);
1185         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1186         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1187         if (debug)
1188         {
1189             fprintf(debug,
1190                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1191                     dd->rank,
1192                     dim,
1193                     dd->neighbor[d][0],
1194                     dd->neighbor[d][1]);
1195         }
1196     }
1197
1198     int nzone  = (1 << dd->ndim);
1199     int nizone = (1 << std::max(dd->ndim - 1, 0));
1200     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1201
1202     gmx_domdec_zones_t* zones = &dd->comm->zones;
1203
1204     for (int i = 0; i < nzone; i++)
1205     {
1206         int m = 0;
1207         clear_ivec(zones->shift[i]);
1208         for (int d = 0; d < dd->ndim; d++)
1209         {
1210             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1211         }
1212     }
1213
1214     zones->n = nzone;
1215     for (int i = 0; i < nzone; i++)
1216     {
1217         for (int d = 0; d < DIM; d++)
1218         {
1219             s[d] = dd->ci[d] - zones->shift[i][d];
1220             if (s[d] < 0)
1221             {
1222                 s[d] += dd->numCells[d];
1223             }
1224             else if (s[d] >= dd->numCells[d])
1225             {
1226                 s[d] -= dd->numCells[d];
1227             }
1228         }
1229     }
1230     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1231     {
1232         GMX_RELEASE_ASSERT(
1233                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1234                 "The first element for each ddNonbondedZonePairRanges should match its index");
1235
1236         DDPairInteractionRanges iZone;
1237         iZone.iZoneIndex = iZoneIndex;
1238         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1239          * j-zones up to nzone.
1240          */
1241         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1242                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1243         for (int dim = 0; dim < DIM; dim++)
1244         {
1245             if (dd->numCells[dim] == 1)
1246             {
1247                 /* All shifts should be allowed */
1248                 iZone.shift0[dim] = -1;
1249                 iZone.shift1[dim] = 1;
1250             }
1251             else
1252             {
1253                 /* Determine the min/max j-zone shift wrt the i-zone */
1254                 iZone.shift0[dim] = 1;
1255                 iZone.shift1[dim] = -1;
1256                 for (int jZone : iZone.jZoneRange)
1257                 {
1258                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1259                     if (shift_diff < iZone.shift0[dim])
1260                     {
1261                         iZone.shift0[dim] = shift_diff;
1262                     }
1263                     if (shift_diff > iZone.shift1[dim])
1264                     {
1265                         iZone.shift1[dim] = shift_diff;
1266                     }
1267                 }
1268             }
1269         }
1270
1271         zones->iZones.push_back(iZone);
1272     }
1273
1274     if (!isDlbDisabled(dd->comm))
1275     {
1276         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1277     }
1278
1279     if (dd->comm->ddSettings.recordLoad)
1280     {
1281         make_load_communicators(dd);
1282     }
1283 }
1284
1285 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1286                                  gmx_domdec_t*        dd,
1287                                  t_commrec gmx_unused* cr,
1288                                  bool gmx_unused reorder)
1289 {
1290 #if GMX_MPI
1291     gmx_domdec_comm_t*  comm      = dd->comm;
1292     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1293
1294     if (cartSetup.bCartesianPP)
1295     {
1296         /* Set up cartesian communication for the particle-particle part */
1297         GMX_LOG(mdlog.info)
1298                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1299                                      dd->numCells[XX],
1300                                      dd->numCells[YY],
1301                                      dd->numCells[ZZ]);
1302
1303         ivec periods;
1304         for (int i = 0; i < DIM; i++)
1305         {
1306             periods[i] = TRUE;
1307         }
1308         MPI_Comm comm_cart = MPI_COMM_NULL;
1309         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1310         /* We overwrite the old communicator with the new cartesian one */
1311         cr->mpi_comm_mygroup = comm_cart;
1312     }
1313
1314     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1315     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1316
1317     if (cartSetup.bCartesianPP_PME)
1318     {
1319         /* Since we want to use the original cartesian setup for sim,
1320          * and not the one after split, we need to make an index.
1321          */
1322         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1323         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1324         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1325         /* Get the rank of the DD master,
1326          * above we made sure that the master node is a PP node.
1327          */
1328         int rank = MASTER(cr) ? dd->rank : 0;
1329         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1330     }
1331     else if (cartSetup.bCartesianPP)
1332     {
1333         if (!comm->ddRankSetup.usePmeOnlyRanks)
1334         {
1335             /* The PP communicator is also
1336              * the communicator for this simulation
1337              */
1338             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1339         }
1340         cr->nodeid = dd->rank;
1341
1342         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1343
1344         /* We need to make an index to go from the coordinates
1345          * to the nodeid of this simulation.
1346          */
1347         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1348         std::vector<int> buf(dd->nnodes);
1349         if (thisRankHasDuty(cr, DUTY_PP))
1350         {
1351             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1352         }
1353         /* Communicate the ddindex to simulation nodeid index */
1354         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1355
1356         /* Determine the master coordinates and rank.
1357          * The DD master should be the same node as the master of this sim.
1358          */
1359         for (int i = 0; i < dd->nnodes; i++)
1360         {
1361             if (cartSetup.ddindex2simnodeid[i] == 0)
1362             {
1363                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1364                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1365             }
1366         }
1367         if (debug)
1368         {
1369             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1370         }
1371     }
1372     else
1373     {
1374         /* No Cartesian communicators */
1375         /* We use the rank in dd->comm->all as DD index */
1376         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1377         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1378         dd->masterrank = 0;
1379         clear_ivec(dd->master_ci);
1380     }
1381 #endif
1382
1383     GMX_LOG(mdlog.info)
1384             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1385                                  dd->rank,
1386                                  dd->ci[XX],
1387                                  dd->ci[YY],
1388                                  dd->ci[ZZ]);
1389     if (debug)
1390     {
1391         fprintf(debug,
1392                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1393                 dd->rank,
1394                 dd->ci[XX],
1395                 dd->ci[YY],
1396                 dd->ci[ZZ]);
1397     }
1398 }
1399
1400 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1401 {
1402 #if GMX_MPI
1403     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1404
1405     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1406     {
1407         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1408         std::vector<int> buf(dd->nnodes);
1409         if (thisRankHasDuty(cr, DUTY_PP))
1410         {
1411             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1412         }
1413         /* Communicate the ddindex to simulation nodeid index */
1414         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1415     }
1416 #else
1417     GMX_UNUSED_VALUE(dd);
1418     GMX_UNUSED_VALUE(cr);
1419 #endif
1420 }
1421
1422 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1423                                              t_commrec*           cr,
1424                                              const DdRankOrder    ddRankOrder,
1425                                              bool gmx_unused    reorder,
1426                                              const DDRankSetup& ddRankSetup,
1427                                              ivec               ddCellIndex,
1428                                              std::vector<int>*  pmeRanks)
1429 {
1430     CartesianRankSetup cartSetup;
1431
1432     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1433     cartSetup.bCartesianPP_PME = false;
1434
1435     const ivec& numDDCells = ddRankSetup.numPPCells;
1436     /* Initially we set ntot to the number of PP cells */
1437     copy_ivec(numDDCells, cartSetup.ntot);
1438
1439     if (cartSetup.bCartesianPP)
1440     {
1441         const int numDDCellsTot = ddRankSetup.numPPRanks;
1442         bool      bDiv[DIM];
1443         for (int i = 1; i < DIM; i++)
1444         {
1445             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1446         }
1447         if (bDiv[YY] || bDiv[ZZ])
1448         {
1449             cartSetup.bCartesianPP_PME = TRUE;
1450             /* If we have 2D PME decomposition, which is always in x+y,
1451              * we stack the PME only nodes in z.
1452              * Otherwise we choose the direction that provides the thinnest slab
1453              * of PME only nodes as this will have the least effect
1454              * on the PP communication.
1455              * But for the PME communication the opposite might be better.
1456              */
1457             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1458             {
1459                 cartSetup.cartpmedim = ZZ;
1460             }
1461             else
1462             {
1463                 cartSetup.cartpmedim = YY;
1464             }
1465             cartSetup.ntot[cartSetup.cartpmedim] +=
1466                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1467         }
1468         else
1469         {
1470             GMX_LOG(mdlog.info)
1471                     .appendTextFormatted(
1472                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1473                             "nx*nz (%d*%d)",
1474                             ddRankSetup.numRanksDoingPme,
1475                             numDDCells[XX],
1476                             numDDCells[YY],
1477                             numDDCells[XX],
1478                             numDDCells[ZZ]);
1479             GMX_LOG(mdlog.info)
1480                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1481         }
1482     }
1483
1484     if (cartSetup.bCartesianPP_PME)
1485     {
1486 #if GMX_MPI
1487         int  rank = 0;
1488         ivec periods;
1489
1490         GMX_LOG(mdlog.info)
1491                 .appendTextFormatted(
1492                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1493                         cartSetup.ntot[XX],
1494                         cartSetup.ntot[YY],
1495                         cartSetup.ntot[ZZ]);
1496
1497         for (int i = 0; i < DIM; i++)
1498         {
1499             periods[i] = TRUE;
1500         }
1501         MPI_Comm comm_cart = MPI_COMM_NULL;
1502         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1503         MPI_Comm_rank(comm_cart, &rank);
1504         if (MASTER(cr) && rank != 0)
1505         {
1506             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1507         }
1508
1509         /* With this assigment we loose the link to the original communicator
1510          * which will usually be MPI_COMM_WORLD, unless have multisim.
1511          */
1512         cr->mpi_comm_mysim = comm_cart;
1513         cr->sim_nodeid     = rank;
1514
1515         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1516
1517         GMX_LOG(mdlog.info)
1518                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1519                                      cr->sim_nodeid,
1520                                      ddCellIndex[XX],
1521                                      ddCellIndex[YY],
1522                                      ddCellIndex[ZZ]);
1523
1524         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1525         {
1526             cr->duty = DUTY_PP;
1527         }
1528         if (!ddRankSetup.usePmeOnlyRanks
1529             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1530         {
1531             cr->duty = DUTY_PME;
1532         }
1533
1534         /* Split the sim communicator into PP and PME only nodes */
1535         MPI_Comm_split(cr->mpi_comm_mysim,
1536                        getThisRankDuties(cr),
1537                        dd_index(cartSetup.ntot, ddCellIndex),
1538                        &cr->mpi_comm_mygroup);
1539 #else
1540         GMX_UNUSED_VALUE(ddCellIndex);
1541 #endif
1542     }
1543     else
1544     {
1545         switch (ddRankOrder)
1546         {
1547             case DdRankOrder::pp_pme:
1548                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1549                 break;
1550             case DdRankOrder::interleave:
1551                 /* Interleave the PP-only and PME-only ranks */
1552                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1553                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1554                 break;
1555             case DdRankOrder::cartesian: break;
1556             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1557         }
1558
1559         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1560         {
1561             cr->duty = DUTY_PME;
1562         }
1563         else
1564         {
1565             cr->duty = DUTY_PP;
1566         }
1567 #if GMX_MPI
1568         /* Split the sim communicator into PP and PME only nodes */
1569         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1570         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1571 #endif
1572     }
1573
1574     GMX_LOG(mdlog.info)
1575             .appendTextFormatted("This rank does only %s work.\n",
1576                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1577
1578     return cartSetup;
1579 }
1580
1581 /*! \brief Makes the PP communicator and the PME communicator, when needed
1582  *
1583  * Returns the Cartesian rank setup.
1584  * Sets \p cr->mpi_comm_mygroup
1585  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1586  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1587  */
1588 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1589                                                  const DDSettings&    ddSettings,
1590                                                  const DdRankOrder    ddRankOrder,
1591                                                  const DDRankSetup&   ddRankSetup,
1592                                                  t_commrec*           cr,
1593                                                  ivec                 ddCellIndex,
1594                                                  std::vector<int>*    pmeRanks)
1595 {
1596     CartesianRankSetup cartSetup;
1597
1598     // As a default, both group and sim communicators are equal to the default communicator
1599     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1600     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1601     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1602     cr->nodeid           = cr->rankInDefaultCommunicator;
1603     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1604
1605     if (ddRankSetup.usePmeOnlyRanks)
1606     {
1607         /* Split the communicator into a PP and PME part */
1608         cartSetup = split_communicator(
1609                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1610     }
1611     else
1612     {
1613         /* All nodes do PP and PME */
1614         /* We do not require separate communicators */
1615         cartSetup.bCartesianPP     = false;
1616         cartSetup.bCartesianPP_PME = false;
1617     }
1618
1619     return cartSetup;
1620 }
1621
1622 /*! \brief For PP ranks, sets or makes the communicator
1623  *
1624  * For PME ranks get the rank id.
1625  * For PP only ranks, sets the PME-only rank.
1626  */
1627 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1628                                     const DDSettings&        ddSettings,
1629                                     gmx::ArrayRef<const int> pmeRanks,
1630                                     t_commrec*               cr,
1631                                     const int                numAtomsInSystem,
1632                                     gmx_domdec_t*            dd)
1633 {
1634     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1635     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1636
1637     if (thisRankHasDuty(cr, DUTY_PP))
1638     {
1639         /* Copy or make a new PP communicator */
1640
1641         /* We (possibly) reordered the nodes in split_communicator,
1642          * so it is no longer required in make_pp_communicator.
1643          */
1644         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1645
1646         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1647     }
1648     else
1649     {
1650         receive_ddindex2simnodeid(dd, cr);
1651     }
1652
1653     if (!thisRankHasDuty(cr, DUTY_PME))
1654     {
1655         /* Set up the commnuication to our PME node */
1656         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1657         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1658         if (debug)
1659         {
1660             fprintf(debug,
1661                     "My pme_nodeid %d receive ener %s\n",
1662                     dd->pme_nodeid,
1663                     gmx::boolToString(dd->pme_receive_vir_ener));
1664         }
1665     }
1666     else
1667     {
1668         dd->pme_nodeid = -1;
1669     }
1670
1671     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1672     if (MASTER(cr))
1673     {
1674         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1675     }
1676 }
1677
1678 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1679 {
1680     real* slb_frac = nullptr;
1681     if (nc > 1 && size_string != nullptr)
1682     {
1683         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1684         snew(slb_frac, nc);
1685         real tot = 0;
1686         for (int i = 0; i < nc; i++)
1687         {
1688             double dbl = 0;
1689             int    n   = 0;
1690             sscanf(size_string, "%20lf%n", &dbl, &n);
1691             if (dbl == 0)
1692             {
1693                 gmx_fatal(FARGS,
1694                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1695                           dir,
1696                           size_string);
1697             }
1698             slb_frac[i] = dbl;
1699             size_string += n;
1700             tot += slb_frac[i];
1701         }
1702         /* Normalize */
1703         std::string relativeCellSizes = "Relative cell sizes:";
1704         for (int i = 0; i < nc; i++)
1705         {
1706             slb_frac[i] /= tot;
1707             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1708         }
1709         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1710     }
1711
1712     return slb_frac;
1713 }
1714
1715 static int multi_body_bondeds_count(const gmx_mtop_t& mtop)
1716 {
1717     int n = 0;
1718     for (const auto ilists : IListRange(mtop))
1719     {
1720         for (auto& ilist : extractILists(ilists.list(), IF_BOND))
1721         {
1722             if (NRAL(ilist.functionType) > 2)
1723             {
1724                 n += ilists.nmol() * (ilist.iatoms.size() / ilistStride(ilist));
1725             }
1726         }
1727     }
1728
1729     return n;
1730 }
1731
1732 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1733 {
1734     int   nst = def;
1735     char* val = getenv(env_var);
1736     if (val)
1737     {
1738         if (sscanf(val, "%20d", &nst) <= 0)
1739         {
1740             nst = 1;
1741         }
1742         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1743     }
1744
1745     return nst;
1746 }
1747
1748 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec& inputrec, const gmx::MDLogger& mdlog)
1749 {
1750     if (inputrec.pbcType == PbcType::Screw
1751         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1752     {
1753         gmx_fatal(FARGS,
1754                   "With pbc=%s can only do domain decomposition in the x-direction",
1755                   c_pbcTypeNames[inputrec.pbcType].c_str());
1756     }
1757
1758     if (inputrec.nstlist == 0)
1759     {
1760         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1761     }
1762
1763     if (inputrec.comm_mode == ComRemovalAlgorithm::Angular && inputrec.pbcType != PbcType::No)
1764     {
1765         GMX_LOG(mdlog.warning)
1766                 .appendText(
1767                         "comm-mode angular will give incorrect results when the comm group "
1768                         "partially crosses a periodic boundary");
1769     }
1770 }
1771
1772 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1773 {
1774     real r = ddbox.box_size[XX];
1775     for (int d = 0; d < DIM; d++)
1776     {
1777         if (numDomains[d] > 1)
1778         {
1779             /* Check using the initial average cell size */
1780             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1781         }
1782     }
1783
1784     return r;
1785 }
1786
1787 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1788  */
1789 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1790                                   const std::string&   reasonStr,
1791                                   const gmx::MDLogger& mdlog)
1792 {
1793     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1794     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1795
1796     if (cmdlineDlbState == DlbState::onUser)
1797     {
1798         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1799     }
1800     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1801     {
1802         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1803     }
1804     return DlbState::offForever;
1805 }
1806
1807 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1808  *
1809  * This function parses the parameters of "-dlb" command line option setting
1810  * corresponding state values. Then it checks the consistency of the determined
1811  * state with other run parameters and settings. As a result, the initial state
1812  * may be altered or an error may be thrown if incompatibility of options is detected.
1813  *
1814  * \param [in] mdlog       Logger.
1815  * \param [in] dlbOption   Enum value for the DLB option.
1816  * \param [in] bRecordLoad True if the load balancer is recording load information.
1817  * \param [in] mdrunOptions  Options for mdrun.
1818  * \param [in] inputrec    Pointer mdrun to input parameters.
1819  * \returns                DLB initial/startup state.
1820  */
1821 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1822                                          DlbOption                dlbOption,
1823                                          gmx_bool                 bRecordLoad,
1824                                          const gmx::MdrunOptions& mdrunOptions,
1825                                          const t_inputrec&        inputrec)
1826 {
1827     DlbState dlbState = DlbState::offCanTurnOn;
1828
1829     switch (dlbOption)
1830     {
1831         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1832         case DlbOption::no: dlbState = DlbState::offUser; break;
1833         case DlbOption::yes: dlbState = DlbState::onUser; break;
1834         default: gmx_incons("Invalid dlbOption enum value");
1835     }
1836
1837     /* Reruns don't support DLB: bail or override auto mode */
1838     if (mdrunOptions.rerun)
1839     {
1840         std::string reasonStr = "it is not supported in reruns.";
1841         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1842     }
1843
1844     /* Unsupported integrators */
1845     if (!EI_DYNAMICS(inputrec.eI))
1846     {
1847         auto reasonStr =
1848                 gmx::formatString("it is only supported with dynamics, not with integrator '%s'.",
1849                                   enumValueToString(inputrec.eI));
1850         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1851     }
1852
1853     /* Without cycle counters we can't time work to balance on */
1854     if (!bRecordLoad)
1855     {
1856         std::string reasonStr =
1857                 "cycle counters unsupported or not enabled in the operating system kernel.";
1858         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1859     }
1860
1861     if (mdrunOptions.reproducible)
1862     {
1863         std::string reasonStr = "you started a reproducible run.";
1864         switch (dlbState)
1865         {
1866             case DlbState::offUser: break;
1867             case DlbState::offForever:
1868                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1869                 break;
1870             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1871             case DlbState::onCanTurnOff:
1872                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1873                 break;
1874             case DlbState::onUser:
1875                 return forceDlbOffOrBail(
1876                         dlbState,
1877                         reasonStr
1878                                 + " In load balanced runs binary reproducibility cannot be "
1879                                   "ensured.",
1880                         mdlog);
1881             default:
1882                 gmx_fatal(FARGS,
1883                           "Death horror: undefined case (%d) for load balancing choice",
1884                           static_cast<int>(dlbState));
1885         }
1886     }
1887
1888     return dlbState;
1889 }
1890
1891 static gmx_domdec_comm_t* init_dd_comm()
1892 {
1893     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1894
1895     comm->n_load_have    = 0;
1896     comm->n_load_collect = 0;
1897
1898     comm->haveTurnedOffDlb = false;
1899
1900     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1901     {
1902         comm->sum_nat[i] = 0;
1903     }
1904     comm->ndecomp   = 0;
1905     comm->nload     = 0;
1906     comm->load_step = 0;
1907     comm->load_sum  = 0;
1908     comm->load_max  = 0;
1909     clear_ivec(comm->load_lim);
1910     comm->load_mdf = 0;
1911     comm->load_pme = 0;
1912
1913     /* This should be replaced by a unique pointer */
1914     comm->balanceRegion = ddBalanceRegionAllocate();
1915
1916     return comm;
1917 }
1918
1919 /* Returns whether mtop contains constraints and/or vsites */
1920 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1921 {
1922     return std::any_of(IListRange(mtop).begin(), IListRange(mtop).end(), [](const auto ilist) {
1923         return !extractILists(ilist.list(), IF_CONSTRAINT | IF_VSITE).empty();
1924     });
1925 }
1926
1927 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1928                               const gmx_mtop_t&    mtop,
1929                               const t_inputrec&    inputrec,
1930                               const real           cutoffMargin,
1931                               DDSystemInfo*        systemInfo)
1932 {
1933     /* When we have constraints and/or vsites, it is beneficial to use
1934      * update groups (when possible) to allow independent update of groups.
1935      */
1936     if (!systemHasConstraintsOrVsites(mtop))
1937     {
1938         /* No constraints or vsites, atoms can be updated independently */
1939         return;
1940     }
1941
1942     systemInfo->updateGroupingsPerMoleculeType = gmx::makeUpdateGroupingsPerMoleculeType(mtop);
1943     systemInfo->useUpdateGroups = (!systemInfo->updateGroupingsPerMoleculeType.empty()
1944                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1945
1946     if (systemInfo->useUpdateGroups)
1947     {
1948         int numUpdateGroups = 0;
1949         for (const auto& molblock : mtop.molblock)
1950         {
1951             numUpdateGroups += molblock.nmol
1952                                * systemInfo->updateGroupingsPerMoleculeType[molblock.type].numBlocks();
1953         }
1954
1955         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1956                 mtop, systemInfo->updateGroupingsPerMoleculeType, maxReferenceTemperature(inputrec));
1957
1958         /* To use update groups, the large domain-to-domain cutoff distance
1959          * should be compatible with the box size.
1960          */
1961         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1962
1963         if (systemInfo->useUpdateGroups)
1964         {
1965             GMX_LOG(mdlog.info)
1966                     .appendTextFormatted(
1967                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1968                             "nm\n",
1969                             numUpdateGroups,
1970                             mtop.natoms / static_cast<double>(numUpdateGroups),
1971                             systemInfo->maxUpdateGroupRadius);
1972         }
1973         else
1974         {
1975             GMX_LOG(mdlog.info)
1976                     .appendTextFormatted(
1977                             "The combination of rlist and box size prohibits the use of update "
1978                             "groups\n");
1979             systemInfo->updateGroupingsPerMoleculeType.clear();
1980         }
1981     }
1982 }
1983
1984 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
1985     npbcdim(numPbcDimensions(ir.pbcType)),
1986     numBoundedDimensions(inputrec2nboundeddim(&ir)),
1987     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
1988     haveScrewPBC(ir.pbcType == PbcType::Screw)
1989 {
1990 }
1991
1992 /* Returns whether molecules are always whole, i.e. not broken by PBC */
1993 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
1994                                     const bool                                  useUpdateGroups,
1995                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType)
1996 {
1997     if (useUpdateGroups)
1998     {
1999         GMX_RELEASE_ASSERT(updateGroupingsPerMoleculeType.size() == mtop.moltype.size(),
2000                            "Need one grouping per moltype");
2001         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2002         {
2003             if (updateGroupingsPerMoleculeType[mol].numBlocks() > 1)
2004             {
2005                 return false;
2006             }
2007         }
2008     }
2009     else
2010     {
2011         for (const auto& moltype : mtop.moltype)
2012         {
2013             if (moltype.atoms.nr > 1)
2014             {
2015                 return false;
2016             }
2017         }
2018     }
2019
2020     return true;
2021 }
2022
2023 /*! \brief Generate the simulation system information */
2024 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2025                                   DDRole                         ddRole,
2026                                   MPI_Comm                       communicator,
2027                                   const DomdecOptions&           options,
2028                                   const gmx_mtop_t&              mtop,
2029                                   const t_inputrec&              ir,
2030                                   const matrix                   box,
2031                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2032 {
2033     const real tenPercentMargin = 1.1;
2034
2035     DDSystemInfo systemInfo;
2036
2037     /* We need to decide on update groups early, as this affects communication distances */
2038     systemInfo.useUpdateGroups = false;
2039     if (ir.cutoff_scheme == CutoffScheme::Verlet)
2040     {
2041         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2042         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2043     }
2044
2045     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2046             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingsPerMoleculeType);
2047     systemInfo.haveInterDomainBondeds =
2048             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2049     systemInfo.haveInterDomainMultiBodyBondeds =
2050             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(mtop) > 0);
2051
2052     if (systemInfo.useUpdateGroups)
2053     {
2054         systemInfo.mayHaveSplitConstraints = false;
2055         systemInfo.mayHaveSplitSettles     = false;
2056     }
2057     else
2058     {
2059         systemInfo.mayHaveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2060                                               || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2061         systemInfo.mayHaveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2062     }
2063
2064     if (ir.rlist == 0)
2065     {
2066         /* Set the cut-off to some very large value,
2067          * so we don't need if statements everywhere in the code.
2068          * We use sqrt, since the cut-off is squared in some places.
2069          */
2070         systemInfo.cutoff = GMX_CUTOFF_INF;
2071     }
2072     else
2073     {
2074         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2075     }
2076     systemInfo.minCutoffForMultiBody = 0;
2077
2078     /* Determine the minimum cell size limit, affected by many factors */
2079     systemInfo.cellsizeLimit             = 0;
2080     systemInfo.filterBondedCommunication = false;
2081
2082     /* We do not allow home atoms to move beyond the neighboring domain
2083      * between domain decomposition steps, which limits the cell size.
2084      * Get an estimate of cell size limit due to atom displacement.
2085      * In most cases this is a large overestimate, because it assumes
2086      * non-interaction atoms.
2087      * We set the chance to 1 in a trillion steps.
2088      */
2089     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2090     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2091             mtop, ir, systemInfo.updateGroupingsPerMoleculeType, c_chanceThatAtomMovesBeyondDomain);
2092     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2093
2094     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2095
2096     /* TODO: PME decomposition currently requires atoms not to be more than
2097      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2098      *       In nearly all cases, limitForAtomDisplacement will be smaller
2099      *       than 2/3*rlist, so the PME requirement is satisfied.
2100      *       But it would be better for both correctness and performance
2101      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2102      *       Note that we would need to improve the pairlist buffer case.
2103      */
2104
2105     if (systemInfo.haveInterDomainBondeds)
2106     {
2107         if (options.minimumCommunicationRange > 0)
2108         {
2109             systemInfo.minCutoffForMultiBody =
2110                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2111             if (options.useBondedCommunication)
2112             {
2113                 systemInfo.filterBondedCommunication =
2114                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2115             }
2116             else
2117             {
2118                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2119             }
2120         }
2121         else if (ir.bPeriodicMols)
2122         {
2123             /* Can not easily determine the required cut-off */
2124             GMX_LOG(mdlog.warning)
2125                     .appendText(
2126                             "NOTE: Periodic molecules are present in this system. Because of this, "
2127                             "the domain decomposition algorithm cannot easily determine the "
2128                             "minimum cell size that it requires for treating bonded interactions. "
2129                             "Instead, domain decomposition will assume that half the non-bonded "
2130                             "cut-off will be a suitable lower bound.");
2131             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2132         }
2133         else
2134         {
2135             real r_2b = 0;
2136             real r_mb = 0;
2137
2138             if (ddRole == DDRole::Master)
2139             {
2140                 dd_bonded_cg_distance(mdlog, mtop, ir, xGlobal, box, options.ddBondedChecking, &r_2b, &r_mb);
2141             }
2142             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2143             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2144
2145             /* We use an initial margin of 10% for the minimum cell size,
2146              * except when we are just below the non-bonded cut-off.
2147              */
2148             if (options.useBondedCommunication)
2149             {
2150                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2151                 {
2152                     const real r_bonded              = std::max(r_2b, r_mb);
2153                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2154                     /* This is the (only) place where we turn on the filtering */
2155                     systemInfo.filterBondedCommunication = true;
2156                 }
2157                 else
2158                 {
2159                     const real r_bonded = r_mb;
2160                     systemInfo.minCutoffForMultiBody =
2161                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2162                 }
2163                 /* We determine cutoff_mbody later */
2164                 systemInfo.increaseMultiBodyCutoff = true;
2165             }
2166             else
2167             {
2168                 /* No special bonded communication,
2169                  * simply increase the DD cut-off.
2170                  */
2171                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2172                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2173             }
2174         }
2175         GMX_LOG(mdlog.info)
2176                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2177                                      systemInfo.minCutoffForMultiBody);
2178
2179         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2180     }
2181
2182     systemInfo.constraintCommunicationRange = 0;
2183     if (systemInfo.mayHaveSplitConstraints && options.constraintCommunicationRange <= 0)
2184     {
2185         /* There is a cell size limit due to the constraints (P-LINCS) */
2186         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2187         GMX_LOG(mdlog.info)
2188                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2189                                      systemInfo.constraintCommunicationRange);
2190         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2191         {
2192             GMX_LOG(mdlog.info)
2193                     .appendText(
2194                             "This distance will limit the DD cell size, you can override this with "
2195                             "-rcon");
2196         }
2197     }
2198     else if (options.constraintCommunicationRange > 0)
2199     {
2200         /* Here we do not check for dd->splitConstraints.
2201          * because one can also set a cell size limit for virtual sites only
2202          * and at this point we don't know yet if there are intercg v-sites.
2203          */
2204         GMX_LOG(mdlog.info)
2205                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2206                                      options.constraintCommunicationRange);
2207         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2208     }
2209     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2210
2211     return systemInfo;
2212 }
2213
2214 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2215  * implemented. */
2216 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2217                              DDRole               ddRole,
2218                              MPI_Comm             communicator,
2219                              int                  numNodes,
2220                              const DomdecOptions& options,
2221                              const DDSettings&    ddSettings,
2222                              const DDSystemInfo&  systemInfo,
2223                              const real           cellsizeLimit,
2224                              const gmx_ddbox_t&   ddbox)
2225 {
2226     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2227     {
2228         const bool  bC = (systemInfo.mayHaveSplitConstraints
2229                          && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2230         std::string message =
2231                 gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
2232                                   !bC ? "-rdd" : "-rcon",
2233                                   ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2234                                   bC ? " or your LINCS settings" : "");
2235
2236         gmx_fatal_collective(FARGS,
2237                              communicator,
2238                              ddRole == DDRole::Master,
2239                              "There is no domain decomposition for %d ranks that is compatible "
2240                              "with the given box and a minimum cell size of %g nm\n"
2241                              "%s\n"
2242                              "Look in the log file for details on the domain decomposition",
2243                              numNodes - ddGridSetup.numPmeOnlyRanks,
2244                              cellsizeLimit,
2245                              message.c_str());
2246     }
2247
2248     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2249     if (acs < cellsizeLimit)
2250     {
2251         if (options.numCells[XX] <= 0)
2252         {
2253             GMX_RELEASE_ASSERT(
2254                     false,
2255                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2256         }
2257         else
2258         {
2259             gmx_fatal_collective(
2260                     FARGS,
2261                     communicator,
2262                     ddRole == DDRole::Master,
2263                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2264                     "options -dd, -rdd or -rcon, see the log file for details",
2265                     acs,
2266                     cellsizeLimit);
2267         }
2268     }
2269
2270     const int numPPRanks =
2271             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2272     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2273     {
2274         gmx_fatal_collective(FARGS,
2275                              communicator,
2276                              ddRole == DDRole::Master,
2277                              "The size of the domain decomposition grid (%d) does not match the "
2278                              "number of PP ranks (%d). The total number of ranks is %d",
2279                              numPPRanks,
2280                              numNodes - ddGridSetup.numPmeOnlyRanks,
2281                              numNodes);
2282     }
2283     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2284     {
2285         gmx_fatal_collective(FARGS,
2286                              communicator,
2287                              ddRole == DDRole::Master,
2288                              "The number of separate PME ranks (%d) is larger than the number of "
2289                              "PP ranks (%d), this is not supported.",
2290                              ddGridSetup.numPmeOnlyRanks,
2291                              numPPRanks);
2292     }
2293 }
2294
2295 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2296 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2297                                   int                  numNodes,
2298                                   const DDGridSetup&   ddGridSetup,
2299                                   const t_inputrec&    ir)
2300 {
2301     GMX_LOG(mdlog.info)
2302             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2303                                  ddGridSetup.numDomains[XX],
2304                                  ddGridSetup.numDomains[YY],
2305                                  ddGridSetup.numDomains[ZZ],
2306                                  ddGridSetup.numPmeOnlyRanks);
2307
2308     DDRankSetup ddRankSetup;
2309
2310     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2311     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2312
2313     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2314     if (ddRankSetup.usePmeOnlyRanks)
2315     {
2316         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2317     }
2318     else
2319     {
2320         ddRankSetup.numRanksDoingPme =
2321                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2322     }
2323
2324     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2325     {
2326         /* The following choices should match those
2327          * in comm_cost_est in domdec_setup.c.
2328          * Note that here the checks have to take into account
2329          * that the decomposition might occur in a different order than xyz
2330          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2331          * in which case they will not match those in comm_cost_est,
2332          * but since that is mainly for testing purposes that's fine.
2333          */
2334         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2335             && ddGridSetup.ddDimensions[1] == YY
2336             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2337             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2338             && getenv("GMX_PMEONEDD") == nullptr)
2339         {
2340             ddRankSetup.npmedecompdim = 2;
2341             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2342             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2343         }
2344         else
2345         {
2346             /* In case nc is 1 in both x and y we could still choose to
2347              * decompose pme in y instead of x, but we use x for simplicity.
2348              */
2349             ddRankSetup.npmedecompdim = 1;
2350             if (ddGridSetup.ddDimensions[0] == YY)
2351             {
2352                 ddRankSetup.npmenodes_x = 1;
2353                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2354             }
2355             else
2356             {
2357                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2358                 ddRankSetup.npmenodes_y = 1;
2359             }
2360         }
2361         GMX_LOG(mdlog.info)
2362                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2363                                      ddRankSetup.npmenodes_x,
2364                                      ddRankSetup.npmenodes_y,
2365                                      1);
2366     }
2367     else
2368     {
2369         ddRankSetup.npmedecompdim = 0;
2370         ddRankSetup.npmenodes_x   = 0;
2371         ddRankSetup.npmenodes_y   = 0;
2372     }
2373
2374     return ddRankSetup;
2375 }
2376
2377 /*! \brief Set the cell size and interaction limits */
2378 static void set_dd_limits(const gmx::MDLogger& mdlog,
2379                           DDRole               ddRole,
2380                           gmx_domdec_t*        dd,
2381                           const DomdecOptions& options,
2382                           const DDSettings&    ddSettings,
2383                           const DDSystemInfo&  systemInfo,
2384                           const DDGridSetup&   ddGridSetup,
2385                           const int            numPPRanks,
2386                           const gmx_mtop_t&    mtop,
2387                           const t_inputrec&    ir,
2388                           const gmx_ddbox_t&   ddbox)
2389 {
2390     gmx_domdec_comm_t* comm = dd->comm;
2391     comm->ddSettings        = ddSettings;
2392
2393     /* Initialize to GPU share count to 0, might change later */
2394     comm->nrank_gpu_shared = 0;
2395
2396     comm->dlbState = comm->ddSettings.initialDlbState;
2397     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2398     /* To consider turning DLB on after 2*nstlist steps we need to check
2399      * at partitioning count 3. Thus we need to increase the first count by 2.
2400      */
2401     comm->ddPartioningCountFirstDlbOff += 2;
2402
2403     comm->bPMELoadBalDLBLimits = FALSE;
2404
2405     /* Allocate the charge group/atom sorting struct */
2406     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2407
2408     comm->systemInfo = systemInfo;
2409
2410     if (systemInfo.useUpdateGroups)
2411     {
2412         /* Note: We would like to use dd->nnodes for the atom count estimate,
2413          *       but that is not yet available here. But this anyhow only
2414          *       affect performance up to the second dd_partition_system call.
2415          */
2416         const int homeAtomCountEstimate = mtop.natoms / numPPRanks;
2417         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2418                 mtop, systemInfo.updateGroupingsPerMoleculeType, maxReferenceTemperature(ir), homeAtomCountEstimate);
2419     }
2420
2421     /* Set the DD setup given by ddGridSetup */
2422     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2423     dd->ndim = ddGridSetup.numDDDimensions;
2424     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2425
2426     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2427
2428     snew(comm->slb_frac, DIM);
2429     if (isDlbDisabled(comm))
2430     {
2431         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2432         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2433         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2434     }
2435
2436     /* Set the multi-body cut-off and cellsize limit for DLB */
2437     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2438     comm->cellsize_limit = systemInfo.cellsizeLimit;
2439     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2440     {
2441         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2442         {
2443             /* Set the bonded communication distance to halfway
2444              * the minimum and the maximum,
2445              * since the extra communication cost is nearly zero.
2446              */
2447             real acs           = average_cellsize_min(ddbox, dd->numCells);
2448             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2449             if (!isDlbDisabled(comm))
2450             {
2451                 /* Check if this does not limit the scaling */
2452                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2453             }
2454             if (!systemInfo.filterBondedCommunication)
2455             {
2456                 /* Without bBondComm do not go beyond the n.b. cut-off */
2457                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2458                 if (comm->cellsize_limit >= systemInfo.cutoff)
2459                 {
2460                     /* We don't loose a lot of efficieny
2461                      * when increasing it to the n.b. cut-off.
2462                      * It can even be slightly faster, because we need
2463                      * less checks for the communication setup.
2464                      */
2465                     comm->cutoff_mbody = systemInfo.cutoff;
2466                 }
2467             }
2468             /* Check if we did not end up below our original limit */
2469             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2470
2471             if (comm->cutoff_mbody > comm->cellsize_limit)
2472             {
2473                 comm->cellsize_limit = comm->cutoff_mbody;
2474             }
2475         }
2476         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2477     }
2478
2479     if (debug)
2480     {
2481         fprintf(debug,
2482                 "Bonded atom communication beyond the cut-off: %s\n"
2483                 "cellsize limit %f\n",
2484                 gmx::boolToString(systemInfo.filterBondedCommunication),
2485                 comm->cellsize_limit);
2486     }
2487
2488     if (ddRole == DDRole::Master)
2489     {
2490         check_dd_restrictions(dd, ir, mdlog);
2491     }
2492 }
2493
2494 void dd_init_bondeds(FILE*                           fplog,
2495                      gmx_domdec_t*                   dd,
2496                      const gmx_mtop_t&               mtop,
2497                      const gmx::VirtualSitesHandler* vsite,
2498                      const t_inputrec&               inputrec,
2499                      const gmx::DDBondedChecking     ddBondedChecking,
2500                      gmx::ArrayRef<cginfo_mb_t>      cginfo_mb)
2501 {
2502     dd_make_reverse_top(fplog, dd, mtop, vsite, inputrec, ddBondedChecking);
2503
2504     gmx_domdec_comm_t* comm = dd->comm;
2505
2506     if (comm->systemInfo.filterBondedCommunication)
2507     {
2508         /* Communicate atoms beyond the cut-off for bonded interactions */
2509         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2510     }
2511     else
2512     {
2513         /* Only communicate atoms based on cut-off */
2514         comm->bondedLinks = nullptr;
2515     }
2516 }
2517
2518 static void writeSettings(gmx::TextWriter*   log,
2519                           gmx_domdec_t*      dd,
2520                           const gmx_mtop_t&  mtop,
2521                           const t_inputrec&  ir,
2522                           gmx_bool           bDynLoadBal,
2523                           real               dlb_scale,
2524                           const gmx_ddbox_t* ddbox)
2525 {
2526     gmx_domdec_comm_t* comm = dd->comm;
2527
2528     if (bDynLoadBal)
2529     {
2530         log->writeString("The maximum number of communication pulses is:");
2531         for (int d = 0; d < dd->ndim; d++)
2532         {
2533             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2534         }
2535         log->ensureLineBreak();
2536         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2537                                 comm->cellsize_limit);
2538         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2539         log->writeString("The allowed shrink of domain decomposition cells is:");
2540         for (int d = 0; d < DIM; d++)
2541         {
2542             if (dd->numCells[d] > 1)
2543             {
2544                 const real shrink =
2545                         (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2546                                 ? 0
2547                                 : comm->cellsize_min_dlb[d]
2548                                           / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2549                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2550             }
2551         }
2552         log->ensureLineBreak();
2553     }
2554     else
2555     {
2556         ivec np;
2557         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2558         log->writeString("The initial number of communication pulses is:");
2559         for (int d = 0; d < dd->ndim; d++)
2560         {
2561             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2562         }
2563         log->ensureLineBreak();
2564         log->writeString("The initial domain decomposition cell size is:");
2565         for (int d = 0; d < DIM; d++)
2566         {
2567             if (dd->numCells[d] > 1)
2568             {
2569                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2570             }
2571         }
2572         log->ensureLineBreak();
2573         log->writeLine();
2574     }
2575
2576     const bool haveInterDomainVsites =
2577             (countInterUpdategroupVsites(mtop, comm->systemInfo.updateGroupingsPerMoleculeType) != 0);
2578
2579     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2580         || comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2581     {
2582         std::string decompUnits;
2583         if (comm->systemInfo.useUpdateGroups)
2584         {
2585             decompUnits = "atom groups";
2586         }
2587         else
2588         {
2589             decompUnits = "atoms";
2590         }
2591
2592         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2593                                 decompUnits.c_str());
2594         log->writeLineFormatted(
2595                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2596
2597         real limit = 0;
2598         if (bDynLoadBal)
2599         {
2600             limit = dd->comm->cellsize_limit;
2601         }
2602         else
2603         {
2604             if (dd->unitCellInfo.ddBoxIsDynamic)
2605             {
2606                 log->writeLine(
2607                         "(the following are initial values, they could change due to box "
2608                         "deformation)");
2609             }
2610             limit = dd->comm->cellsize_min[XX];
2611             for (int d = 1; d < DIM; d++)
2612             {
2613                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2614             }
2615         }
2616
2617         if (comm->systemInfo.haveInterDomainBondeds)
2618         {
2619             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2620                                     "two-body bonded interactions",
2621                                     "(-rdd)",
2622                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2623             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2624                                     "multi-body bonded interactions",
2625                                     "(-rdd)",
2626                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2627                                             ? comm->cutoff_mbody
2628                                             : std::min(comm->systemInfo.cutoff, limit));
2629         }
2630         if (haveInterDomainVsites)
2631         {
2632             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2633         }
2634         if (comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2635         {
2636             std::string separation =
2637                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir.nProjOrder);
2638             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2639         }
2640         log->ensureLineBreak();
2641     }
2642 }
2643
2644 static void logSettings(const gmx::MDLogger& mdlog,
2645                         gmx_domdec_t*        dd,
2646                         const gmx_mtop_t&    mtop,
2647                         const t_inputrec&    ir,
2648                         real                 dlb_scale,
2649                         const gmx_ddbox_t*   ddbox)
2650 {
2651     gmx::StringOutputStream stream;
2652     gmx::TextWriter         log(&stream);
2653     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2654     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2655     {
2656         {
2657             log.ensureEmptyLine();
2658             log.writeLine(
2659                     "When dynamic load balancing gets turned on, these settings will change to:");
2660         }
2661         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2662     }
2663     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2664 }
2665
2666 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2667                                 gmx_domdec_t*        dd,
2668                                 real                 dlb_scale,
2669                                 const t_inputrec&    inputrec,
2670                                 const gmx_ddbox_t*   ddbox)
2671 {
2672     int npulse       = 0;
2673     int npulse_d_max = 0;
2674     int npulse_d     = 0;
2675
2676     gmx_domdec_comm_t* comm = dd->comm;
2677
2678     bool bNoCutOff = (inputrec.rvdw == 0 || inputrec.rcoulomb == 0);
2679
2680     /* Determine the maximum number of comm. pulses in one dimension */
2681
2682     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2683
2684     /* Determine the maximum required number of grid pulses */
2685     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2686     {
2687         /* Only a single pulse is required */
2688         npulse = 1;
2689     }
2690     else if (!bNoCutOff && comm->cellsize_limit > 0)
2691     {
2692         /* We round down slightly here to avoid overhead due to the latency
2693          * of extra communication calls when the cut-off
2694          * would be only slightly longer than the cell size.
2695          * Later cellsize_limit is redetermined,
2696          * so we can not miss interactions due to this rounding.
2697          */
2698         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2699     }
2700     else
2701     {
2702         /* There is no cell size limit */
2703         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2704     }
2705
2706     if (!bNoCutOff && npulse > 1)
2707     {
2708         /* See if we can do with less pulses, based on dlb_scale */
2709         npulse_d_max = 0;
2710         for (int d = 0; d < dd->ndim; d++)
2711         {
2712             int dim  = dd->dim[d];
2713             npulse_d = static_cast<int>(
2714                     1
2715                     + dd->numCells[dim] * comm->systemInfo.cutoff
2716                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2717             npulse_d_max = std::max(npulse_d_max, npulse_d);
2718         }
2719         npulse = std::min(npulse, npulse_d_max);
2720     }
2721
2722     /* This env var can override npulse */
2723     const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2724     if (ddPulseEnv > 0)
2725     {
2726         npulse = ddPulseEnv;
2727     }
2728
2729     comm->maxpulse       = 1;
2730     comm->bVacDLBNoLimit = (inputrec.pbcType == PbcType::No);
2731     for (int d = 0; d < dd->ndim; d++)
2732     {
2733         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2734         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2735         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2736         {
2737             comm->bVacDLBNoLimit = FALSE;
2738         }
2739     }
2740
2741     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2742     if (!comm->bVacDLBNoLimit)
2743     {
2744         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2745     }
2746     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2747     /* Set the minimum cell size for each DD dimension */
2748     for (int d = 0; d < dd->ndim; d++)
2749     {
2750         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2751         {
2752             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2753         }
2754         else
2755         {
2756             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2757         }
2758     }
2759     if (comm->cutoff_mbody <= 0)
2760     {
2761         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2762     }
2763     if (isDlbOn(comm))
2764     {
2765         set_dlb_limits(dd);
2766     }
2767 }
2768
2769 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2770 {
2771     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2772 }
2773
2774 bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType)
2775 {
2776     /* If each molecule is a single charge group
2777      * or we use domain decomposition for each periodic dimension,
2778      * we do not need to take pbc into account for the bonded interactions.
2779      */
2780     return (pbcType != PbcType::No && dd.comm->systemInfo.haveInterDomainBondeds
2781             && !(dd.numCells[XX] > 1 && dd.numCells[YY] > 1
2782                  && (dd.numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2783 }
2784
2785 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2786 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2787                                   gmx_domdec_t*        dd,
2788                                   real                 dlb_scale,
2789                                   const gmx_mtop_t&    mtop,
2790                                   const t_inputrec&    inputrec,
2791                                   const gmx_ddbox_t*   ddbox)
2792 {
2793     gmx_domdec_comm_t* comm        = dd->comm;
2794     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2795
2796     if (EEL_PME(inputrec.coulombtype) || EVDW_PME(inputrec.vdwtype))
2797     {
2798         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2799         if (ddRankSetup.npmedecompdim >= 2)
2800         {
2801             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2802         }
2803     }
2804     else
2805     {
2806         ddRankSetup.numRanksDoingPme = 0;
2807         if (dd->pme_nodeid >= 0)
2808         {
2809             gmx_fatal_collective(FARGS,
2810                                  dd->mpi_comm_all,
2811                                  DDMASTER(dd),
2812                                  "Can not have separate PME ranks without PME electrostatics");
2813         }
2814     }
2815
2816     if (debug)
2817     {
2818         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2819     }
2820     if (!isDlbDisabled(comm))
2821     {
2822         set_cell_limits_dlb(mdlog, dd, dlb_scale, inputrec, ddbox);
2823     }
2824
2825     logSettings(mdlog, dd, mtop, inputrec, dlb_scale, ddbox);
2826
2827     const real vol_frac = (inputrec.pbcType == PbcType::No)
2828                                   ? (1 - 1 / static_cast<double>(dd->nnodes))
2829                                   : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2830                                      / static_cast<double>(dd->nnodes));
2831     if (debug)
2832     {
2833         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2834     }
2835     int natoms_tot = mtop.natoms;
2836
2837     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2838 }
2839
2840 /*! \brief Get some important DD parameters which can be modified by env.vars */
2841 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2842                                 const DomdecOptions&     options,
2843                                 const gmx::MdrunOptions& mdrunOptions,
2844                                 const t_inputrec&        ir)
2845 {
2846     DDSettings ddSettings;
2847
2848     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2849     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2850     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2851     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2852     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2853     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2854     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2855     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2856     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2857
2858     if (ddSettings.useSendRecv2)
2859     {
2860         GMX_LOG(mdlog.info)
2861                 .appendText(
2862                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2863                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2864                         "communication");
2865     }
2866
2867     if (ddSettings.eFlop)
2868     {
2869         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2870         ddSettings.recordLoad = true;
2871     }
2872     else
2873     {
2874         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2875     }
2876
2877     ddSettings.initialDlbState =
2878             determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
2879     GMX_LOG(mdlog.info)
2880             .appendTextFormatted("Dynamic load balancing: %s",
2881                                  enumValueToString(ddSettings.initialDlbState));
2882
2883     return ddSettings;
2884 }
2885
2886 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2887
2888 gmx_domdec_t::~gmx_domdec_t() = default;
2889
2890 namespace gmx
2891 {
2892
2893 // TODO once the functionality stablizes, move this class and
2894 // supporting functionality into builder.cpp
2895 /*! \brief Impl class for DD builder */
2896 class DomainDecompositionBuilder::Impl
2897 {
2898 public:
2899     //! Constructor
2900     Impl(const MDLogger&      mdlog,
2901          t_commrec*           cr,
2902          const DomdecOptions& options,
2903          const MdrunOptions&  mdrunOptions,
2904          const gmx_mtop_t&    mtop,
2905          const t_inputrec&    ir,
2906          const matrix         box,
2907          ArrayRef<const RVec> xGlobal);
2908
2909     //! Build the resulting DD manager
2910     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2911
2912     //! Objects used in constructing and configuring DD
2913     //! {
2914     //! Logging object
2915     const MDLogger& mdlog_;
2916     //! Communication object
2917     t_commrec* cr_;
2918     //! User-supplied options configuring DD behavior
2919     const DomdecOptions options_;
2920     //! Global system topology
2921     const gmx_mtop_t& mtop_;
2922     //! User input values from the tpr file
2923     const t_inputrec& ir_;
2924     //! }
2925
2926     //! Internal objects used in constructing DD
2927     //! {
2928     //! Settings combined from the user input
2929     DDSettings ddSettings_;
2930     //! Information derived from the simulation system
2931     DDSystemInfo systemInfo_;
2932     //! Box structure
2933     gmx_ddbox_t ddbox_ = { 0 };
2934     //! Organization of the DD grids
2935     DDGridSetup ddGridSetup_;
2936     //! Organzation of the DD ranks
2937     DDRankSetup ddRankSetup_;
2938     //! Number of DD cells in each dimension
2939     ivec ddCellIndex_ = { 0, 0, 0 };
2940     //! IDs of PME-only ranks
2941     std::vector<int> pmeRanks_;
2942     //! Contains a valid Cartesian-communicator-based setup, or defaults.
2943     CartesianRankSetup cartSetup_;
2944     //! }
2945 };
2946
2947 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
2948                                        t_commrec*           cr,
2949                                        const DomdecOptions& options,
2950                                        const MdrunOptions&  mdrunOptions,
2951                                        const gmx_mtop_t&    mtop,
2952                                        const t_inputrec&    ir,
2953                                        const matrix         box,
2954                                        ArrayRef<const RVec> xGlobal) :
2955     mdlog_(mdlog),
2956     cr_(cr),
2957     options_(options),
2958     mtop_(mtop),
2959     ir_(ir)
2960 {
2961     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2962
2963     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2964
2965     if (ddSettings_.eFlop > 1)
2966     {
2967         /* Ensure that we have different random flop counts on different ranks */
2968         srand(1 + cr_->rankInDefaultCommunicator);
2969     }
2970
2971     systemInfo_ = getSystemInfo(mdlog_,
2972                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2973                                 cr->mpiDefaultCommunicator,
2974                                 options_,
2975                                 mtop_,
2976                                 ir_,
2977                                 box,
2978                                 xGlobal);
2979
2980     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
2981     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2982     checkForValidRankCountRequests(
2983             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
2984
2985     // DD grid setup uses a more different cell size limit for
2986     // automated setup than the one in systemInfo_. The latter is used
2987     // in set_dd_limits() to configure DLB, for example.
2988     const real gridSetupCellsizeLimit =
2989             getDDGridSetupCellSizeLimit(mdlog_,
2990                                         !isDlbDisabled(ddSettings_.initialDlbState),
2991                                         options_.dlbScaling,
2992                                         ir_,
2993                                         systemInfo_.cellsizeLimit);
2994     ddGridSetup_ = getDDGridSetup(mdlog_,
2995                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2996                                   cr->mpiDefaultCommunicator,
2997                                   numRanksRequested,
2998                                   options_,
2999                                   ddSettings_,
3000                                   systemInfo_,
3001                                   gridSetupCellsizeLimit,
3002                                   mtop_,
3003                                   ir_,
3004                                   box,
3005                                   xGlobal,
3006                                   &ddbox_);
3007     checkDDGridSetup(ddGridSetup_,
3008                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3009                      cr->mpiDefaultCommunicator,
3010                      cr->sizeOfDefaultCommunicator,
3011                      options_,
3012                      ddSettings_,
3013                      systemInfo_,
3014                      gridSetupCellsizeLimit,
3015                      ddbox_);
3016
3017     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3018
3019     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
3020
3021     /* Generate the group communicator, also decides the duty of each rank */
3022     cartSetup_ = makeGroupCommunicators(
3023             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
3024 }
3025
3026 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3027 {
3028     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3029
3030     copy_ivec(ddCellIndex_, dd->ci);
3031
3032     dd->comm = init_dd_comm();
3033
3034     dd->comm->ddRankSetup        = ddRankSetup_;
3035     dd->comm->cartesianRankSetup = cartSetup_;
3036
3037     set_dd_limits(mdlog_,
3038                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3039                   dd,
3040                   options_,
3041                   ddSettings_,
3042                   systemInfo_,
3043                   ddGridSetup_,
3044                   ddRankSetup_.numPPRanks,
3045                   mtop_,
3046                   ir_,
3047                   ddbox_);
3048
3049     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3050
3051     if (thisRankHasDuty(cr_, DUTY_PP))
3052     {
3053         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, mtop_, ir_, &ddbox_);
3054
3055         setup_neighbor_relations(dd);
3056     }
3057
3058     /* Set overallocation to avoid frequent reallocation of arrays */
3059     set_over_alloc_dd(true);
3060
3061     dd->atomSets = atomSets;
3062
3063     return dd;
3064 }
3065
3066 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3067                                                        t_commrec*           cr,
3068                                                        const DomdecOptions& options,
3069                                                        const MdrunOptions&  mdrunOptions,
3070                                                        const gmx_mtop_t&    mtop,
3071                                                        const t_inputrec&    ir,
3072                                                        const matrix         box,
3073                                                        ArrayRef<const RVec> xGlobal) :
3074     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
3075 {
3076 }
3077
3078 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3079 {
3080     return impl_->build(atomSets);
3081 }
3082
3083 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3084
3085 } // namespace gmx
3086
3087 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3088 {
3089     gmx_ddbox_t ddbox;
3090     int         LocallyLimited = 0;
3091
3092     const auto* dd = cr->dd;
3093
3094     set_ddbox(*dd, false, box, true, x, &ddbox);
3095
3096     LocallyLimited = 0;
3097
3098     for (int d = 0; d < dd->ndim; d++)
3099     {
3100         const int dim = dd->dim[d];
3101
3102         real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3103         if (dd->unitCellInfo.ddBoxIsDynamic)
3104         {
3105             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3106         }
3107
3108         const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3109
3110         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3111         {
3112             if (np > dd->comm->cd[d].np_dlb)
3113             {
3114                 return FALSE;
3115             }
3116
3117             /* If a current local cell size is smaller than the requested
3118              * cut-off, we could still fix it, but this gets very complicated.
3119              * Without fixing here, we might actually need more checks.
3120              */
3121             real cellSizeAlongDim =
3122                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3123             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3124             {
3125                 LocallyLimited = 1;
3126             }
3127         }
3128     }
3129
3130     if (!isDlbDisabled(dd->comm))
3131     {
3132         /* If DLB is not active yet, we don't need to check the grid jumps.
3133          * Actually we shouldn't, because then the grid jump data is not set.
3134          */
3135         if (isDlbOn(dd->comm) && gmx::check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3136         {
3137             LocallyLimited = 1;
3138         }
3139
3140         gmx_sumi(1, &LocallyLimited, cr);
3141
3142         if (LocallyLimited > 0)
3143         {
3144             return FALSE;
3145         }
3146     }
3147
3148     return TRUE;
3149 }
3150
3151 bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3152 {
3153     bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3154
3155     if (bCutoffAllowed)
3156     {
3157         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3158     }
3159
3160     return bCutoffAllowed;
3161 }
3162
3163 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3164                               const t_commrec&                cr,
3165                               const gmx::DeviceStreamManager& deviceStreamManager,
3166                               gmx_wallcycle*                  wcycle)
3167 {
3168     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3169                        "Local non-bonded stream should be valid when using"
3170                        "GPU halo exchange.");
3171     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3172                        "Non-local non-bonded stream should be valid when using "
3173                        "GPU halo exchange.");
3174
3175     if (cr.dd->gpuHaloExchange[0].empty())
3176     {
3177         GMX_LOG(mdlog.warning)
3178                 .asParagraph()
3179                 .appendTextFormatted(
3180                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3181                         "by the "
3182                         "GMX_GPU_DD_COMMS environment variable.");
3183     }
3184
3185     for (int d = 0; d < cr.dd->ndim; d++)
3186     {
3187         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3188         {
3189             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3190                     cr.dd,
3191                     d,
3192                     cr.mpi_comm_mygroup,
3193                     deviceStreamManager.context(),
3194                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3195                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3196                     pulse,
3197                     wcycle));
3198         }
3199     }
3200 }
3201
3202 void reinitGpuHaloExchange(const t_commrec&              cr,
3203                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3204                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3205 {
3206     for (int d = 0; d < cr.dd->ndim; d++)
3207     {
3208         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3209         {
3210             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3211         }
3212     }
3213 }
3214
3215 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3216                                    const matrix          box,
3217                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3218 {
3219     for (int d = 0; d < cr.dd->ndim; d++)
3220     {
3221         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3222         {
3223             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3224         }
3225     }
3226 }
3227
3228 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3229 {
3230     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3231     {
3232         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3233         {
3234             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3235         }
3236     }
3237 }