Refactor gmx_reverse_top_t
[alexxy/gromacs.git] / src / gromacs / domdec / domdec.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
7  * Copyright (c) 2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38
39 #include "gmxpre.h"
40
41 #include "domdec.h"
42
43 #include "config.h"
44
45 #include <cassert>
46 #include <cinttypes>
47 #include <climits>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
52
53 #include <algorithm>
54 #include <memory>
55
56 #include "gromacs/domdec/builder.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlb.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/ga2la.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/localtopologychecker.h"
64 #include "gromacs/domdec/options.h"
65 #include "gromacs/domdec/partition.h"
66 #include "gromacs/domdec/reversetopology.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/device_stream_manager.h"
70 #include "gromacs/gpu_utils/gpu_utils.h"
71 #include "gromacs/hardware/hw_info.h"
72 #include "gromacs/math/vec.h"
73 #include "gromacs/math/vectypes.h"
74 #include "gromacs/mdlib/calc_verletbuf.h"
75 #include "gromacs/mdlib/constr.h"
76 #include "gromacs/mdlib/constraintrange.h"
77 #include "gromacs/mdlib/updategroups.h"
78 #include "gromacs/mdlib/vsite.h"
79 #include "gromacs/mdtypes/commrec.h"
80 #include "gromacs/mdtypes/forceoutput.h"
81 #include "gromacs/mdtypes/inputrec.h"
82 #include "gromacs/mdtypes/mdrunoptions.h"
83 #include "gromacs/mdtypes/state.h"
84 #include "gromacs/pbcutil/ishift.h"
85 #include "gromacs/pbcutil/pbc.h"
86 #include "gromacs/pulling/pull.h"
87 #include "gromacs/timing/wallcycle.h"
88 #include "gromacs/topology/block.h"
89 #include "gromacs/topology/idef.h"
90 #include "gromacs/topology/ifunc.h"
91 #include "gromacs/topology/mtop_lookup.h"
92 #include "gromacs/topology/mtop_util.h"
93 #include "gromacs/topology/topology.h"
94 #include "gromacs/utility/basedefinitions.h"
95 #include "gromacs/utility/basenetwork.h"
96 #include "gromacs/utility/cstringutil.h"
97 #include "gromacs/utility/enumerationhelpers.h"
98 #include "gromacs/utility/exceptions.h"
99 #include "gromacs/utility/fatalerror.h"
100 #include "gromacs/utility/gmxmpi.h"
101 #include "gromacs/utility/logger.h"
102 #include "gromacs/utility/real.h"
103 #include "gromacs/utility/smalloc.h"
104 #include "gromacs/utility/strconvert.h"
105 #include "gromacs/utility/stringstream.h"
106 #include "gromacs/utility/stringutil.h"
107 #include "gromacs/utility/textwriter.h"
108
109 #include "atomdistribution.h"
110 #include "box.h"
111 #include "cellsizes.h"
112 #include "distribute.h"
113 #include "domdec_constraints.h"
114 #include "domdec_internal.h"
115 #include "domdec_setup.h"
116 #include "domdec_vsite.h"
117 #include "redistribute.h"
118 #include "utility.h"
119
120 // TODO remove this when moving domdec into gmx namespace
121 using gmx::ArrayRef;
122 using gmx::DdRankOrder;
123 using gmx::DlbOption;
124 using gmx::DomdecOptions;
125 using gmx::RangePartitioning;
126
127 static const char* enumValueToString(DlbState enumValue)
128 {
129     static constexpr gmx::EnumerationArray<DlbState, const char*> dlbStateNames = {
130         "off", "off", "auto", "locked", "on", "on"
131     };
132     return dlbStateNames[enumValue];
133 }
134
135 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
136 #define DD_CGIBS 2
137
138 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
139 #define DD_FLAG_NRCG 65535
140 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
141 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
142
143 /* The DD zone order */
144 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
145                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
146
147 /* The non-bonded zone-pair setup for domain decomposition
148  * The first number is the i-zone, the second number the first j-zone seen by
149  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
150  * As is, this is for 3D decomposition, where there are 4 i-zones.
151  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
152  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
153  */
154 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
155                                                                { 1, 3, 6 },
156                                                                { 2, 5, 6 },
157                                                                { 3, 5, 7 } };
158
159 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
160 {
161     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
162     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
163     xyz[ZZ] = ind % nc[ZZ];
164 }
165
166 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
167 {
168     int ddnodeid = -1;
169
170     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
171     const int                 ddindex   = dd_index(dd->numCells, c);
172     if (cartSetup.bCartesianPP_PME)
173     {
174         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
175     }
176     else if (cartSetup.bCartesianPP)
177     {
178 #if GMX_MPI
179         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
180 #endif
181     }
182     else
183     {
184         ddnodeid = ddindex;
185     }
186
187     return ddnodeid;
188 }
189
190 int ddglatnr(const gmx_domdec_t* dd, int i)
191 {
192     int atnr = 0;
193
194     if (dd == nullptr)
195     {
196         atnr = i + 1;
197     }
198     else
199     {
200         if (i >= dd->comm->atomRanges.numAtomsTotal())
201         {
202             gmx_fatal(FARGS,
203                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
204                       i,
205                       dd->comm->atomRanges.numAtomsTotal());
206         }
207         atnr = dd->globalAtomIndices[i] + 1;
208     }
209
210     return atnr;
211 }
212
213 void dd_store_state(const gmx_domdec_t& dd, t_state* state)
214 {
215     if (state->ddp_count != dd.ddp_count)
216     {
217         gmx_incons("The MD state does not match the domain decomposition state");
218     }
219
220     state->cg_gl.resize(dd.ncg_home);
221     for (int i = 0; i < dd.ncg_home; i++)
222     {
223         state->cg_gl[i] = dd.globalAtomGroupIndices[i];
224     }
225
226     state->ddp_count_cg_gl = dd.ddp_count;
227 }
228
229 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
230 {
231     return &dd->comm->zones;
232 }
233
234 int dd_numAtomsZones(const gmx_domdec_t& dd)
235 {
236     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
237 }
238
239 int dd_numHomeAtoms(const gmx_domdec_t& dd)
240 {
241     return dd.comm->atomRanges.numHomeAtoms();
242 }
243
244 int dd_natoms_mdatoms(const gmx_domdec_t& dd)
245 {
246     /* We currently set mdatoms entries for all atoms:
247      * local + non-local + communicated for vsite + constraints
248      */
249
250     return dd.comm->atomRanges.numAtomsTotal();
251 }
252
253 int dd_natoms_vsite(const gmx_domdec_t& dd)
254 {
255     return dd.comm->atomRanges.end(DDAtomRanges::Type::Vsites);
256 }
257
258 void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
259 {
260     *at_start = dd.comm->atomRanges.start(DDAtomRanges::Type::Constraints);
261     *at_end   = dd.comm->atomRanges.end(DDAtomRanges::Type::Constraints);
262 }
263
264 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
265 {
266     wallcycle_start(wcycle, WallCycleCounter::MoveX);
267
268     rvec shift = { 0, 0, 0 };
269
270     gmx_domdec_comm_t* comm = dd->comm;
271
272     int nzone   = 1;
273     int nat_tot = comm->atomRanges.numHomeAtoms();
274     for (int d = 0; d < dd->ndim; d++)
275     {
276         const bool bPBC   = (dd->ci[dd->dim[d]] == 0);
277         const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
278         if (bPBC)
279         {
280             copy_rvec(box[dd->dim[d]], shift);
281         }
282         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
283         for (const gmx_domdec_ind_t& ind : cd->ind)
284         {
285             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
286             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
287             int                       n          = 0;
288             if (!bPBC)
289             {
290                 for (int j : ind.index)
291                 {
292                     sendBuffer[n] = x[j];
293                     n++;
294                 }
295             }
296             else if (!bScrew)
297             {
298                 for (int j : ind.index)
299                 {
300                     /* We need to shift the coordinates */
301                     for (int d = 0; d < DIM; d++)
302                     {
303                         sendBuffer[n][d] = x[j][d] + shift[d];
304                     }
305                     n++;
306                 }
307             }
308             else
309             {
310                 for (int j : ind.index)
311                 {
312                     /* Shift x */
313                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
314                     /* Rotate y and z.
315                      * This operation requires a special shift force
316                      * treatment, which is performed in calc_vir.
317                      */
318                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
319                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
320                     n++;
321                 }
322             }
323
324             DDBufferAccess<gmx::RVec> receiveBufferAccess(
325                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
326
327             gmx::ArrayRef<gmx::RVec> receiveBuffer;
328             if (cd->receiveInPlace)
329             {
330                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
331             }
332             else
333             {
334                 receiveBuffer = receiveBufferAccess.buffer;
335             }
336             /* Send and receive the coordinates */
337             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
338
339             if (!cd->receiveInPlace)
340             {
341                 int j = 0;
342                 for (int zone = 0; zone < nzone; zone++)
343                 {
344                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
345                     {
346                         x[i] = receiveBuffer[j++];
347                     }
348                 }
349             }
350             nat_tot += ind.nrecv[nzone + 1];
351         }
352         nzone += nzone;
353     }
354
355     wallcycle_stop(wcycle, WallCycleCounter::MoveX);
356 }
357
358 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
359 {
360     wallcycle_start(wcycle, WallCycleCounter::MoveF);
361
362     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
363     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
364
365     gmx_domdec_comm_t& comm    = *dd->comm;
366     int                nzone   = comm.zones.n / 2;
367     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
368     for (int d = dd->ndim - 1; d >= 0; d--)
369     {
370         /* Only forces in domains near the PBC boundaries need to
371            consider PBC in the treatment of fshift */
372         const bool shiftForcesNeedPbc =
373                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
374         const bool applyScrewPbc =
375                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
376         /* Determine which shift vector we need */
377         ivec vis        = { 0, 0, 0 };
378         vis[dd->dim[d]] = 1;
379         const int is    = gmx::ivecToShiftIndex(vis);
380
381         /* Loop over the pulses */
382         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
383         for (int p = cd.numPulses() - 1; p >= 0; p--)
384         {
385             const gmx_domdec_ind_t&   ind = cd.ind[p];
386             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
387             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
388
389             nat_tot -= ind.nrecv[nzone + 1];
390
391             DDBufferAccess<gmx::RVec> sendBufferAccess(
392                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
393
394             gmx::ArrayRef<gmx::RVec> sendBuffer;
395             if (cd.receiveInPlace)
396             {
397                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
398             }
399             else
400             {
401                 sendBuffer = sendBufferAccess.buffer;
402                 int j      = 0;
403                 for (int zone = 0; zone < nzone; zone++)
404                 {
405                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
406                     {
407                         sendBuffer[j++] = f[i];
408                     }
409                 }
410             }
411             /* Communicate the forces */
412             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
413             /* Add the received forces */
414             int n = 0;
415             if (!shiftForcesNeedPbc)
416             {
417                 for (int j : ind.index)
418                 {
419                     for (int d = 0; d < DIM; d++)
420                     {
421                         f[j][d] += receiveBuffer[n][d];
422                     }
423                     n++;
424                 }
425             }
426             else if (!applyScrewPbc)
427             {
428                 for (int j : ind.index)
429                 {
430                     for (int d = 0; d < DIM; d++)
431                     {
432                         f[j][d] += receiveBuffer[n][d];
433                     }
434                     /* Add this force to the shift force */
435                     for (int d = 0; d < DIM; d++)
436                     {
437                         fshift[is][d] += receiveBuffer[n][d];
438                     }
439                     n++;
440                 }
441             }
442             else
443             {
444                 for (int j : ind.index)
445                 {
446                     /* Rotate the force */
447                     f[j][XX] += receiveBuffer[n][XX];
448                     f[j][YY] -= receiveBuffer[n][YY];
449                     f[j][ZZ] -= receiveBuffer[n][ZZ];
450                     if (shiftForcesNeedPbc)
451                     {
452                         /* Add this force to the shift force */
453                         for (int d = 0; d < DIM; d++)
454                         {
455                             fshift[is][d] += receiveBuffer[n][d];
456                         }
457                     }
458                     n++;
459                 }
460             }
461         }
462         nzone /= 2;
463     }
464     wallcycle_stop(wcycle, WallCycleCounter::MoveF);
465 }
466
467 /* Convenience function for extracting a real buffer from an rvec buffer
468  *
469  * To reduce the number of temporary communication buffers and avoid
470  * cache polution, we reuse gmx::RVec buffers for storing reals.
471  * This functions return a real buffer reference with the same number
472  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
473  */
474 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
475 {
476     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
477 }
478
479 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
480 {
481     gmx_domdec_comm_t* comm = dd->comm;
482
483     int nzone   = 1;
484     int nat_tot = comm->atomRanges.numHomeAtoms();
485     for (int d = 0; d < dd->ndim; d++)
486     {
487         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
488         for (const gmx_domdec_ind_t& ind : cd->ind)
489         {
490             /* Note: We provision for RVec instead of real, so a factor of 3
491              * more than needed. The buffer actually already has this size
492              * and we pass a plain pointer below, so this does not matter.
493              */
494             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
495             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
496             int                 n          = 0;
497             for (int j : ind.index)
498             {
499                 sendBuffer[n++] = v[j];
500             }
501
502             DDBufferAccess<gmx::RVec> receiveBufferAccess(
503                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
504
505             gmx::ArrayRef<real> receiveBuffer;
506             if (cd->receiveInPlace)
507             {
508                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
509             }
510             else
511             {
512                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
513             }
514             /* Send and receive the data */
515             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
516             if (!cd->receiveInPlace)
517             {
518                 int j = 0;
519                 for (int zone = 0; zone < nzone; zone++)
520                 {
521                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
522                     {
523                         v[i] = receiveBuffer[j++];
524                     }
525                 }
526             }
527             nat_tot += ind.nrecv[nzone + 1];
528         }
529         nzone += nzone;
530     }
531 }
532
533 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
534 {
535     gmx_domdec_comm_t* comm = dd->comm;
536
537     int nzone   = comm->zones.n / 2;
538     int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
539     for (int d = dd->ndim - 1; d >= 0; d--)
540     {
541         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
542         for (int p = cd->numPulses() - 1; p >= 0; p--)
543         {
544             const gmx_domdec_ind_t& ind = cd->ind[p];
545
546             /* Note: We provision for RVec instead of real, so a factor of 3
547              * more than needed. The buffer actually already has this size
548              * and we typecast, so this works as intended.
549              */
550             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
551             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
552             nat_tot -= ind.nrecv[nzone + 1];
553
554             DDBufferAccess<gmx::RVec> sendBufferAccess(
555                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
556
557             gmx::ArrayRef<real> sendBuffer;
558             if (cd->receiveInPlace)
559             {
560                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
561             }
562             else
563             {
564                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
565                 int j      = 0;
566                 for (int zone = 0; zone < nzone; zone++)
567                 {
568                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
569                     {
570                         sendBuffer[j++] = v[i];
571                     }
572                 }
573             }
574             /* Communicate the forces */
575             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
576             /* Add the received forces */
577             int n = 0;
578             for (int j : ind.index)
579             {
580                 v[j] += receiveBuffer[n];
581                 n++;
582             }
583         }
584         nzone /= 2;
585     }
586 }
587
588 real dd_cutoff_multibody(const gmx_domdec_t* dd)
589 {
590     const gmx_domdec_comm_t& comm       = *dd->comm;
591     const DDSystemInfo&      systemInfo = comm.systemInfo;
592
593     real r = -1;
594     if (systemInfo.haveInterDomainMultiBodyBondeds)
595     {
596         if (comm.cutoff_mbody > 0)
597         {
598             r = comm.cutoff_mbody;
599         }
600         else
601         {
602             /* cutoff_mbody=0 means we do not have DLB */
603             r = comm.cellsize_min[dd->dim[0]];
604             for (int di = 1; di < dd->ndim; di++)
605             {
606                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
607             }
608             if (comm.systemInfo.filterBondedCommunication)
609             {
610                 r = std::max(r, comm.cutoff_mbody);
611             }
612             else
613             {
614                 r = std::min(r, systemInfo.cutoff);
615             }
616         }
617     }
618
619     return r;
620 }
621
622 real dd_cutoff_twobody(const gmx_domdec_t* dd)
623 {
624     const real r_mb = dd_cutoff_multibody(dd);
625
626     return std::max(dd->comm->systemInfo.cutoff, r_mb);
627 }
628
629
630 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
631                                    const CartesianRankSetup& cartSetup,
632                                    const ivec                coord,
633                                    ivec                      coord_pme)
634 {
635     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
636     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
637     copy_ivec(coord, coord_pme);
638     coord_pme[cartSetup.cartpmedim] =
639             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
640 }
641
642 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
643 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
644 {
645     const int npp  = ddRankSetup.numPPRanks;
646     const int npme = ddRankSetup.numRanksDoingPme;
647
648     /* Here we assign a PME node to communicate with this DD node
649      * by assuming that the major index of both is x.
650      * We add npme/2 to obtain an even distribution.
651      */
652     return (ddCellIndex * npme + npme / 2) / npp;
653 }
654
655 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
656 {
657     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
658
659     int n = 0;
660     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
661     {
662         const int p0 = ddindex2pmeindex(ddRankSetup, i);
663         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
664         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
665         {
666             if (debug)
667             {
668                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
669             }
670             pmeRanks[n] = i + 1 + n;
671             n++;
672         }
673     }
674
675     return pmeRanks;
676 }
677
678 static int gmx_ddcoord2pmeindex(const gmx_domdec_t& dd, int x, int y, int z)
679 {
680     ivec coords;
681
682     coords[XX]     = x;
683     coords[YY]     = y;
684     coords[ZZ]     = z;
685     const int slab = ddindex2pmeindex(dd.comm->ddRankSetup, dd_index(dd.numCells, coords));
686
687     return slab;
688 }
689
690 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
691 {
692     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
693     ivec                      coords    = { x, y, z };
694     int                       nodeid    = -1;
695
696     if (cartSetup.bCartesianPP_PME)
697     {
698 #if GMX_MPI
699         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
700 #endif
701     }
702     else
703     {
704         int ddindex = dd_index(cr->dd->numCells, coords);
705         if (cartSetup.bCartesianPP)
706         {
707             nodeid = cartSetup.ddindex2simnodeid[ddindex];
708         }
709         else
710         {
711             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
712             {
713                 nodeid = ddindex + gmx_ddcoord2pmeindex(*cr->dd, x, y, z);
714             }
715             else
716             {
717                 nodeid = ddindex;
718             }
719         }
720     }
721
722     return nodeid;
723 }
724
725 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
726                               const CartesianRankSetup& cartSetup,
727                               gmx::ArrayRef<const int>  pmeRanks,
728                               const t_commrec gmx_unused* cr,
729                               const int                   sim_nodeid)
730 {
731     int pmenode = -1;
732
733     /* This assumes a uniform x domain decomposition grid cell size */
734     if (cartSetup.bCartesianPP_PME)
735     {
736 #if GMX_MPI
737         ivec coord, coord_pme;
738         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
739         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
740         {
741             /* This is a PP rank */
742             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
743             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
744         }
745 #endif
746     }
747     else if (cartSetup.bCartesianPP)
748     {
749         if (sim_nodeid < ddRankSetup.numPPRanks)
750         {
751             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
752         }
753     }
754     else
755     {
756         /* This assumes DD cells with identical x coordinates
757          * are numbered sequentially.
758          */
759         if (pmeRanks.empty())
760         {
761             if (sim_nodeid < ddRankSetup.numPPRanks)
762             {
763                 /* The DD index equals the nodeid */
764                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
765             }
766         }
767         else
768         {
769             int i = 0;
770             while (sim_nodeid > pmeRanks[i])
771             {
772                 i++;
773             }
774             if (sim_nodeid < pmeRanks[i])
775             {
776                 pmenode = pmeRanks[i];
777             }
778         }
779     }
780
781     return pmenode;
782 }
783
784 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
785 {
786     if (dd != nullptr)
787     {
788         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
789     }
790     else
791     {
792         return { 1, 1 };
793     }
794 }
795
796 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
797 {
798     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
799     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
800     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
801                        "This function should only be called when PME-only ranks are in use");
802     std::vector<int> ddranks;
803     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
804
805     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
806     {
807         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
808         {
809             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
810             {
811                 if (cartSetup.bCartesianPP_PME)
812                 {
813                     ivec coord = { x, y, z };
814                     ivec coord_pme;
815                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
816                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
817                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
818                     {
819                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
820                     }
821                 }
822                 else
823                 {
824                     /* The slab corresponds to the nodeid in the PME group */
825                     if (gmx_ddcoord2pmeindex(*cr->dd, x, y, z) == pmenodeid)
826                     {
827                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
828                     }
829                 }
830             }
831         }
832     }
833     return ddranks;
834 }
835
836 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
837 {
838     bool bReceive = true;
839
840     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
841     if (ddRankSetup.usePmeOnlyRanks)
842     {
843         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
844         if (cartSetup.bCartesianPP_PME)
845         {
846 #if GMX_MPI
847             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
848             ivec coords;
849             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
850             coords[cartSetup.cartpmedim]++;
851             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
852             {
853                 int rank = 0;
854                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
855                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
856                 {
857                     /* This is not the last PP node for pmenode */
858                     bReceive = FALSE;
859                 }
860             }
861 #else
862             GMX_RELEASE_ASSERT(
863                     false,
864                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
865 #endif
866         }
867         else
868         {
869             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
870             if (cr->sim_nodeid + 1 < cr->nnodes
871                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
872             {
873                 /* This is not the last PP node for pmenode */
874                 bReceive = FALSE;
875             }
876         }
877     }
878
879     return bReceive;
880 }
881
882 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
883 {
884     gmx_domdec_comm_t* comm = dd->comm;
885
886     snew(*dim_f, dd->numCells[dim] + 1);
887     (*dim_f)[0] = 0;
888     for (int i = 1; i < dd->numCells[dim]; i++)
889     {
890         if (comm->slb_frac[dim])
891         {
892             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
893         }
894         else
895         {
896             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
897         }
898     }
899     (*dim_f)[dd->numCells[dim]] = 1;
900 }
901
902 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
903 {
904     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
905
906     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
907     {
908         ddpme->dim = YY;
909     }
910     else
911     {
912         ddpme->dim = dimind;
913     }
914     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
915
916     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
917
918     if (ddpme->nslab <= 1)
919     {
920         return;
921     }
922
923     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
924     /* Determine for each PME slab the PP location range for dimension dim */
925     snew(ddpme->pp_min, ddpme->nslab);
926     snew(ddpme->pp_max, ddpme->nslab);
927     for (int slab = 0; slab < ddpme->nslab; slab++)
928     {
929         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
930         ddpme->pp_max[slab] = 0;
931     }
932     for (int i = 0; i < dd->nnodes; i++)
933     {
934         ivec xyz;
935         ddindex2xyz(dd->numCells, i, xyz);
936         /* For y only use our y/z slab.
937          * This assumes that the PME x grid size matches the DD grid size.
938          */
939         if (dimind == 0 || xyz[XX] == dd->ci[XX])
940         {
941             const int pmeindex  = ddindex2pmeindex(ddRankSetup, i);
942             const int slab      = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
943             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
944             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
945         }
946     }
947
948     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
949 }
950
951 int dd_pme_maxshift_x(const gmx_domdec_t& dd)
952 {
953     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
954
955     if (ddRankSetup.ddpme[0].dim == XX)
956     {
957         return ddRankSetup.ddpme[0].maxshift;
958     }
959     else
960     {
961         return 0;
962     }
963 }
964
965 int dd_pme_maxshift_y(const gmx_domdec_t& dd)
966 {
967     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
968
969     if (ddRankSetup.ddpme[0].dim == YY)
970     {
971         return ddRankSetup.ddpme[0].maxshift;
972     }
973     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
974     {
975         return ddRankSetup.ddpme[1].maxshift;
976     }
977     else
978     {
979         return 0;
980     }
981 }
982
983 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
984 {
985     return dd.comm->systemInfo.useUpdateGroups;
986 }
987
988 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
989 {
990     /* Note that the cycles value can be incorrect, either 0 or some
991      * extremely large value, when our thread migrated to another core
992      * with an unsynchronized cycle counter. If this happens less often
993      * that once per nstlist steps, this will not cause issues, since
994      * we later subtract the maximum value from the sum over nstlist steps.
995      * A zero count will slightly lower the total, but that's a small effect.
996      * Note that the main purpose of the subtraction of the maximum value
997      * is to avoid throwing off the load balancing when stalls occur due
998      * e.g. system activity or network congestion.
999      */
1000     dd->comm->cycl[ddCycl] += cycles;
1001     dd->comm->cycl_n[ddCycl]++;
1002     if (cycles > dd->comm->cycl_max[ddCycl])
1003     {
1004         dd->comm->cycl_max[ddCycl] = cycles;
1005     }
1006 }
1007
1008 #if GMX_MPI
1009 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1010 {
1011     MPI_Comm c_row = MPI_COMM_NULL;
1012     ivec     loc_c;
1013     bool     bPartOfGroup = false;
1014
1015     const int dim = dd->dim[dim_ind];
1016     copy_ivec(loc, loc_c);
1017     for (int i = 0; i < dd->numCells[dim]; i++)
1018     {
1019         loc_c[dim]     = i;
1020         const int rank = dd_index(dd->numCells, loc_c);
1021         if (rank == dd->rank)
1022         {
1023             /* This process is part of the group */
1024             bPartOfGroup = TRUE;
1025         }
1026     }
1027     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1028     if (bPartOfGroup)
1029     {
1030         dd->comm->mpi_comm_load[dim_ind] = c_row;
1031         if (!isDlbDisabled(dd->comm))
1032         {
1033             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1034
1035             if (dd->ci[dim] == dd->master_ci[dim])
1036             {
1037                 /* This is the root process of this row */
1038                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1039
1040                 RowMaster& rowMaster = *cellsizes.rowMaster;
1041                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1042                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1043                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1044                 if (dim_ind > 0)
1045                 {
1046                     rowMaster.bounds.resize(dd->numCells[dim]);
1047                 }
1048                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1049             }
1050             else
1051             {
1052                 /* This is not a root process, we only need to receive cell_f */
1053                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1054             }
1055         }
1056         if (dd->ci[dim] == dd->master_ci[dim])
1057         {
1058             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1059         }
1060     }
1061 }
1062 #endif
1063
1064 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1065 {
1066 #if GMX_MPI
1067     MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
1068
1069     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1070     {
1071         /* Only ranks with short-ranged tasks (currently) use GPUs.
1072          * If we don't have GPUs assigned, there are no resources to share.
1073          */
1074         return;
1075     }
1076
1077     const int physicalnode_id_hash = gmx_physicalnode_id_hash();
1078
1079     gmx_domdec_t* dd = cr->dd;
1080
1081     if (debug)
1082     {
1083         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1084         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1085     }
1086     /* Split the PP communicator over the physical nodes */
1087     /* TODO: See if we should store this (before), as it's also used for
1088      * for the nodecomm summation.
1089      */
1090     // TODO PhysicalNodeCommunicator could be extended/used to handle
1091     // the need for per-node per-group communicators.
1092     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1093     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1094     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1095     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1096
1097     if (debug)
1098     {
1099         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1100     }
1101
1102     /* Note that some ranks could share a GPU, while others don't */
1103
1104     if (dd->comm->nrank_gpu_shared == 1)
1105     {
1106         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1107     }
1108 #else
1109     GMX_UNUSED_VALUE(cr);
1110     GMX_UNUSED_VALUE(gpu_id);
1111 #endif
1112 }
1113
1114 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1115 {
1116 #if GMX_MPI
1117     ivec loc;
1118
1119     if (debug)
1120     {
1121         fprintf(debug, "Making load communicators\n");
1122     }
1123
1124     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1125     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1126
1127     if (dd->ndim == 0)
1128     {
1129         return;
1130     }
1131
1132     clear_ivec(loc);
1133     make_load_communicator(dd, 0, loc);
1134     if (dd->ndim > 1)
1135     {
1136         const int dim0 = dd->dim[0];
1137         for (int i = 0; i < dd->numCells[dim0]; i++)
1138         {
1139             loc[dim0] = i;
1140             make_load_communicator(dd, 1, loc);
1141         }
1142     }
1143     if (dd->ndim > 2)
1144     {
1145         const int dim0 = dd->dim[0];
1146         for (int i = 0; i < dd->numCells[dim0]; i++)
1147         {
1148             loc[dim0]      = i;
1149             const int dim1 = dd->dim[1];
1150             for (int j = 0; j < dd->numCells[dim1]; j++)
1151             {
1152                 loc[dim1] = j;
1153                 make_load_communicator(dd, 2, loc);
1154             }
1155         }
1156     }
1157
1158     if (debug)
1159     {
1160         fprintf(debug, "Finished making load communicators\n");
1161     }
1162 #endif
1163 }
1164
1165 /*! \brief Sets up the relation between neighboring domains and zones */
1166 static void setup_neighbor_relations(gmx_domdec_t* dd)
1167 {
1168     ivec tmp, s;
1169     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1170
1171     for (int d = 0; d < dd->ndim; d++)
1172     {
1173         const int dim = dd->dim[d];
1174         copy_ivec(dd->ci, tmp);
1175         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1176         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1177         copy_ivec(dd->ci, tmp);
1178         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1179         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1180         if (debug)
1181         {
1182             fprintf(debug,
1183                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1184                     dd->rank,
1185                     dim,
1186                     dd->neighbor[d][0],
1187                     dd->neighbor[d][1]);
1188         }
1189     }
1190
1191     int nzone  = (1 << dd->ndim);
1192     int nizone = (1 << std::max(dd->ndim - 1, 0));
1193     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1194
1195     gmx_domdec_zones_t* zones = &dd->comm->zones;
1196
1197     for (int i = 0; i < nzone; i++)
1198     {
1199         int m = 0;
1200         clear_ivec(zones->shift[i]);
1201         for (int d = 0; d < dd->ndim; d++)
1202         {
1203             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1204         }
1205     }
1206
1207     zones->n = nzone;
1208     for (int i = 0; i < nzone; i++)
1209     {
1210         for (int d = 0; d < DIM; d++)
1211         {
1212             s[d] = dd->ci[d] - zones->shift[i][d];
1213             if (s[d] < 0)
1214             {
1215                 s[d] += dd->numCells[d];
1216             }
1217             else if (s[d] >= dd->numCells[d])
1218             {
1219                 s[d] -= dd->numCells[d];
1220             }
1221         }
1222     }
1223     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1224     {
1225         GMX_RELEASE_ASSERT(
1226                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1227                 "The first element for each ddNonbondedZonePairRanges should match its index");
1228
1229         DDPairInteractionRanges iZone;
1230         iZone.iZoneIndex = iZoneIndex;
1231         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1232          * j-zones up to nzone.
1233          */
1234         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1235                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1236         for (int dim = 0; dim < DIM; dim++)
1237         {
1238             if (dd->numCells[dim] == 1)
1239             {
1240                 /* All shifts should be allowed */
1241                 iZone.shift0[dim] = -1;
1242                 iZone.shift1[dim] = 1;
1243             }
1244             else
1245             {
1246                 /* Determine the min/max j-zone shift wrt the i-zone */
1247                 iZone.shift0[dim] = 1;
1248                 iZone.shift1[dim] = -1;
1249                 for (int jZone : iZone.jZoneRange)
1250                 {
1251                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1252                     if (shift_diff < iZone.shift0[dim])
1253                     {
1254                         iZone.shift0[dim] = shift_diff;
1255                     }
1256                     if (shift_diff > iZone.shift1[dim])
1257                     {
1258                         iZone.shift1[dim] = shift_diff;
1259                     }
1260                 }
1261             }
1262         }
1263
1264         zones->iZones.push_back(iZone);
1265     }
1266
1267     if (!isDlbDisabled(dd->comm))
1268     {
1269         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1270     }
1271
1272     if (dd->comm->ddSettings.recordLoad)
1273     {
1274         make_load_communicators(dd);
1275     }
1276 }
1277
1278 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1279                                  gmx_domdec_t*        dd,
1280                                  t_commrec gmx_unused* cr,
1281                                  bool gmx_unused       reorder)
1282 {
1283 #if GMX_MPI
1284     gmx_domdec_comm_t*  comm      = dd->comm;
1285     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1286
1287     if (cartSetup.bCartesianPP)
1288     {
1289         /* Set up cartesian communication for the particle-particle part */
1290         GMX_LOG(mdlog.info)
1291                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1292                                      dd->numCells[XX],
1293                                      dd->numCells[YY],
1294                                      dd->numCells[ZZ]);
1295
1296         ivec periods;
1297         for (int i = 0; i < DIM; i++)
1298         {
1299             periods[i] = TRUE;
1300         }
1301         MPI_Comm comm_cart = MPI_COMM_NULL;
1302         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1303         /* We overwrite the old communicator with the new cartesian one */
1304         cr->mpi_comm_mygroup = comm_cart;
1305     }
1306
1307     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1308     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1309
1310     if (cartSetup.bCartesianPP_PME)
1311     {
1312         /* Since we want to use the original cartesian setup for sim,
1313          * and not the one after split, we need to make an index.
1314          */
1315         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1316         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1317         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1318         /* Get the rank of the DD master,
1319          * above we made sure that the master node is a PP node.
1320          */
1321         int rank = MASTER(cr) ? dd->rank : 0;
1322         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1323     }
1324     else if (cartSetup.bCartesianPP)
1325     {
1326         if (!comm->ddRankSetup.usePmeOnlyRanks)
1327         {
1328             /* The PP communicator is also
1329              * the communicator for this simulation
1330              */
1331             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1332         }
1333         cr->nodeid = dd->rank;
1334
1335         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1336
1337         /* We need to make an index to go from the coordinates
1338          * to the nodeid of this simulation.
1339          */
1340         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1341         std::vector<int> buf(dd->nnodes);
1342         if (thisRankHasDuty(cr, DUTY_PP))
1343         {
1344             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1345         }
1346         /* Communicate the ddindex to simulation nodeid index */
1347         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1348
1349         /* Determine the master coordinates and rank.
1350          * The DD master should be the same node as the master of this sim.
1351          */
1352         for (int i = 0; i < dd->nnodes; i++)
1353         {
1354             if (cartSetup.ddindex2simnodeid[i] == 0)
1355             {
1356                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1357                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1358             }
1359         }
1360         if (debug)
1361         {
1362             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1363         }
1364     }
1365     else
1366     {
1367         /* No Cartesian communicators */
1368         /* We use the rank in dd->comm->all as DD index */
1369         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1370         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1371         dd->masterrank = 0;
1372         clear_ivec(dd->master_ci);
1373     }
1374 #endif
1375
1376     GMX_LOG(mdlog.info)
1377             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1378                                  dd->rank,
1379                                  dd->ci[XX],
1380                                  dd->ci[YY],
1381                                  dd->ci[ZZ]);
1382     if (debug)
1383     {
1384         fprintf(debug,
1385                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1386                 dd->rank,
1387                 dd->ci[XX],
1388                 dd->ci[YY],
1389                 dd->ci[ZZ]);
1390     }
1391 }
1392
1393 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1394 {
1395 #if GMX_MPI
1396     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1397
1398     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1399     {
1400         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1401         std::vector<int> buf(dd->nnodes);
1402         if (thisRankHasDuty(cr, DUTY_PP))
1403         {
1404             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1405         }
1406         /* Communicate the ddindex to simulation nodeid index */
1407         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1408     }
1409 #else
1410     GMX_UNUSED_VALUE(dd);
1411     GMX_UNUSED_VALUE(cr);
1412 #endif
1413 }
1414
1415 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1416                                              t_commrec*           cr,
1417                                              const DdRankOrder    ddRankOrder,
1418                                              bool gmx_unused      reorder,
1419                                              const DDRankSetup&   ddRankSetup,
1420                                              ivec                 ddCellIndex,
1421                                              std::vector<int>*    pmeRanks)
1422 {
1423     CartesianRankSetup cartSetup;
1424
1425     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1426     cartSetup.bCartesianPP_PME = false;
1427
1428     const ivec& numDDCells = ddRankSetup.numPPCells;
1429     /* Initially we set ntot to the number of PP cells */
1430     copy_ivec(numDDCells, cartSetup.ntot);
1431
1432     if (cartSetup.bCartesianPP)
1433     {
1434         const int numDDCellsTot = ddRankSetup.numPPRanks;
1435         bool      bDiv[DIM];
1436         for (int i = 1; i < DIM; i++)
1437         {
1438             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1439         }
1440         if (bDiv[YY] || bDiv[ZZ])
1441         {
1442             cartSetup.bCartesianPP_PME = TRUE;
1443             /* If we have 2D PME decomposition, which is always in x+y,
1444              * we stack the PME only nodes in z.
1445              * Otherwise we choose the direction that provides the thinnest slab
1446              * of PME only nodes as this will have the least effect
1447              * on the PP communication.
1448              * But for the PME communication the opposite might be better.
1449              */
1450             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1451             {
1452                 cartSetup.cartpmedim = ZZ;
1453             }
1454             else
1455             {
1456                 cartSetup.cartpmedim = YY;
1457             }
1458             cartSetup.ntot[cartSetup.cartpmedim] +=
1459                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1460         }
1461         else
1462         {
1463             GMX_LOG(mdlog.info)
1464                     .appendTextFormatted(
1465                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1466                             "nx*nz (%d*%d)",
1467                             ddRankSetup.numRanksDoingPme,
1468                             numDDCells[XX],
1469                             numDDCells[YY],
1470                             numDDCells[XX],
1471                             numDDCells[ZZ]);
1472             GMX_LOG(mdlog.info)
1473                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1474         }
1475     }
1476
1477     if (cartSetup.bCartesianPP_PME)
1478     {
1479 #if GMX_MPI
1480         int  rank = 0;
1481         ivec periods;
1482
1483         GMX_LOG(mdlog.info)
1484                 .appendTextFormatted(
1485                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1486                         cartSetup.ntot[XX],
1487                         cartSetup.ntot[YY],
1488                         cartSetup.ntot[ZZ]);
1489
1490         for (int i = 0; i < DIM; i++)
1491         {
1492             periods[i] = TRUE;
1493         }
1494         MPI_Comm comm_cart = MPI_COMM_NULL;
1495         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1496         MPI_Comm_rank(comm_cart, &rank);
1497         if (MASTER(cr) && rank != 0)
1498         {
1499             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1500         }
1501
1502         /* With this assigment we loose the link to the original communicator
1503          * which will usually be MPI_COMM_WORLD, unless have multisim.
1504          */
1505         cr->mpi_comm_mysim = comm_cart;
1506         cr->sim_nodeid     = rank;
1507
1508         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1509
1510         GMX_LOG(mdlog.info)
1511                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1512                                      cr->sim_nodeid,
1513                                      ddCellIndex[XX],
1514                                      ddCellIndex[YY],
1515                                      ddCellIndex[ZZ]);
1516
1517         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1518         {
1519             cr->duty = DUTY_PP;
1520         }
1521         if (!ddRankSetup.usePmeOnlyRanks
1522             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1523         {
1524             cr->duty = DUTY_PME;
1525         }
1526
1527         /* Split the sim communicator into PP and PME only nodes */
1528         MPI_Comm_split(cr->mpi_comm_mysim,
1529                        getThisRankDuties(cr),
1530                        dd_index(cartSetup.ntot, ddCellIndex),
1531                        &cr->mpi_comm_mygroup);
1532 #else
1533         GMX_UNUSED_VALUE(ddCellIndex);
1534 #endif
1535     }
1536     else
1537     {
1538         switch (ddRankOrder)
1539         {
1540             case DdRankOrder::pp_pme:
1541                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1542                 break;
1543             case DdRankOrder::interleave:
1544                 /* Interleave the PP-only and PME-only ranks */
1545                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1546                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1547                 break;
1548             case DdRankOrder::cartesian: break;
1549             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1550         }
1551
1552         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1553         {
1554             cr->duty = DUTY_PME;
1555         }
1556         else
1557         {
1558             cr->duty = DUTY_PP;
1559         }
1560 #if GMX_MPI
1561         /* Split the sim communicator into PP and PME only nodes */
1562         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1563         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1564 #endif
1565     }
1566
1567     GMX_LOG(mdlog.info)
1568             .appendTextFormatted("This rank does only %s work.\n",
1569                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1570
1571     return cartSetup;
1572 }
1573
1574 /*! \brief Makes the PP communicator and the PME communicator, when needed
1575  *
1576  * Returns the Cartesian rank setup.
1577  * Sets \p cr->mpi_comm_mygroup
1578  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1579  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1580  */
1581 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1582                                                  const DDSettings&    ddSettings,
1583                                                  const DdRankOrder    ddRankOrder,
1584                                                  const DDRankSetup&   ddRankSetup,
1585                                                  t_commrec*           cr,
1586                                                  ivec                 ddCellIndex,
1587                                                  std::vector<int>*    pmeRanks)
1588 {
1589     CartesianRankSetup cartSetup;
1590
1591     // As a default, both group and sim communicators are equal to the default communicator
1592     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1593     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1594     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1595     cr->nodeid           = cr->rankInDefaultCommunicator;
1596     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1597
1598     if (ddRankSetup.usePmeOnlyRanks)
1599     {
1600         /* Split the communicator into a PP and PME part */
1601         cartSetup = split_communicator(
1602                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1603     }
1604     else
1605     {
1606         /* All nodes do PP and PME */
1607         /* We do not require separate communicators */
1608         cartSetup.bCartesianPP     = false;
1609         cartSetup.bCartesianPP_PME = false;
1610     }
1611
1612     return cartSetup;
1613 }
1614
1615 /*! \brief For PP ranks, sets or makes the communicator
1616  *
1617  * For PME ranks get the rank id.
1618  * For PP only ranks, sets the PME-only rank.
1619  */
1620 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1621                                     const DDSettings&        ddSettings,
1622                                     gmx::ArrayRef<const int> pmeRanks,
1623                                     t_commrec*               cr,
1624                                     const int                numAtomsInSystem,
1625                                     gmx_domdec_t*            dd)
1626 {
1627     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1628     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1629
1630     if (thisRankHasDuty(cr, DUTY_PP))
1631     {
1632         /* Copy or make a new PP communicator */
1633
1634         /* We (possibly) reordered the nodes in split_communicator,
1635          * so it is no longer required in make_pp_communicator.
1636          */
1637         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1638
1639         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1640     }
1641     else
1642     {
1643         receive_ddindex2simnodeid(dd, cr);
1644     }
1645
1646     if (!thisRankHasDuty(cr, DUTY_PME))
1647     {
1648         /* Set up the commnuication to our PME node */
1649         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1650         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1651         if (debug)
1652         {
1653             fprintf(debug,
1654                     "My pme_nodeid %d receive ener %s\n",
1655                     dd->pme_nodeid,
1656                     gmx::boolToString(dd->pme_receive_vir_ener));
1657         }
1658     }
1659     else
1660     {
1661         dd->pme_nodeid = -1;
1662     }
1663
1664     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1665     if (MASTER(cr))
1666     {
1667         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1668     }
1669 }
1670
1671 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1672 {
1673     real* slb_frac = nullptr;
1674     if (nc > 1 && size_string != nullptr)
1675     {
1676         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1677         snew(slb_frac, nc);
1678         real tot = 0;
1679         for (int i = 0; i < nc; i++)
1680         {
1681             double dbl = 0;
1682             int    n   = 0;
1683             sscanf(size_string, "%20lf%n", &dbl, &n);
1684             if (dbl == 0)
1685             {
1686                 gmx_fatal(FARGS,
1687                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1688                           dir,
1689                           size_string);
1690             }
1691             slb_frac[i] = dbl;
1692             size_string += n;
1693             tot += slb_frac[i];
1694         }
1695         /* Normalize */
1696         std::string relativeCellSizes = "Relative cell sizes:";
1697         for (int i = 0; i < nc; i++)
1698         {
1699             slb_frac[i] /= tot;
1700             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1701         }
1702         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1703     }
1704
1705     return slb_frac;
1706 }
1707
1708 static int multi_body_bondeds_count(const gmx_mtop_t& mtop)
1709 {
1710     int n = 0;
1711     for (const auto ilists : IListRange(mtop))
1712     {
1713         for (auto& ilist : extractILists(ilists.list(), IF_BOND))
1714         {
1715             if (NRAL(ilist.functionType) > 2)
1716             {
1717                 n += ilists.nmol() * (ilist.iatoms.size() / ilistStride(ilist));
1718             }
1719         }
1720     }
1721
1722     return n;
1723 }
1724
1725 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1726 {
1727     int   nst = def;
1728     char* val = getenv(env_var);
1729     if (val)
1730     {
1731         if (sscanf(val, "%20d", &nst) <= 0)
1732         {
1733             nst = 1;
1734         }
1735         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1736     }
1737
1738     return nst;
1739 }
1740
1741 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec& inputrec, const gmx::MDLogger& mdlog)
1742 {
1743     if (inputrec.pbcType == PbcType::Screw
1744         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1745     {
1746         gmx_fatal(FARGS,
1747                   "With pbc=%s can only do domain decomposition in the x-direction",
1748                   c_pbcTypeNames[inputrec.pbcType].c_str());
1749     }
1750
1751     if (inputrec.nstlist == 0)
1752     {
1753         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1754     }
1755
1756     if (inputrec.comm_mode == ComRemovalAlgorithm::Angular && inputrec.pbcType != PbcType::No)
1757     {
1758         GMX_LOG(mdlog.warning)
1759                 .appendText(
1760                         "comm-mode angular will give incorrect results when the comm group "
1761                         "partially crosses a periodic boundary");
1762     }
1763 }
1764
1765 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1766 {
1767     real r = ddbox.box_size[XX];
1768     for (int d = 0; d < DIM; d++)
1769     {
1770         if (numDomains[d] > 1)
1771         {
1772             /* Check using the initial average cell size */
1773             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1774         }
1775     }
1776
1777     return r;
1778 }
1779
1780 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1781  */
1782 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1783                                   const std::string&   reasonStr,
1784                                   const gmx::MDLogger& mdlog)
1785 {
1786     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1787     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1788
1789     if (cmdlineDlbState == DlbState::onUser)
1790     {
1791         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1792     }
1793     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1794     {
1795         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1796     }
1797     return DlbState::offForever;
1798 }
1799
1800 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1801  *
1802  * This function parses the parameters of "-dlb" command line option setting
1803  * corresponding state values. Then it checks the consistency of the determined
1804  * state with other run parameters and settings. As a result, the initial state
1805  * may be altered or an error may be thrown if incompatibility of options is detected.
1806  *
1807  * \param [in] mdlog       Logger.
1808  * \param [in] dlbOption   Enum value for the DLB option.
1809  * \param [in] bRecordLoad True if the load balancer is recording load information.
1810  * \param [in] mdrunOptions  Options for mdrun.
1811  * \param [in] inputrec    Pointer mdrun to input parameters.
1812  * \returns                DLB initial/startup state.
1813  */
1814 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1815                                          DlbOption                dlbOption,
1816                                          gmx_bool                 bRecordLoad,
1817                                          const gmx::MdrunOptions& mdrunOptions,
1818                                          const t_inputrec&        inputrec)
1819 {
1820     DlbState dlbState = DlbState::offCanTurnOn;
1821
1822     switch (dlbOption)
1823     {
1824         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1825         case DlbOption::no: dlbState = DlbState::offUser; break;
1826         case DlbOption::yes: dlbState = DlbState::onUser; break;
1827         default: gmx_incons("Invalid dlbOption enum value");
1828     }
1829
1830     /* Reruns don't support DLB: bail or override auto mode */
1831     if (mdrunOptions.rerun)
1832     {
1833         std::string reasonStr = "it is not supported in reruns.";
1834         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1835     }
1836
1837     /* Unsupported integrators */
1838     if (!EI_DYNAMICS(inputrec.eI))
1839     {
1840         auto reasonStr =
1841                 gmx::formatString("it is only supported with dynamics, not with integrator '%s'.",
1842                                   enumValueToString(inputrec.eI));
1843         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1844     }
1845
1846     /* Without cycle counters we can't time work to balance on */
1847     if (!bRecordLoad)
1848     {
1849         std::string reasonStr =
1850                 "cycle counters unsupported or not enabled in the operating system kernel.";
1851         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1852     }
1853
1854     if (mdrunOptions.reproducible)
1855     {
1856         std::string reasonStr = "you started a reproducible run.";
1857         switch (dlbState)
1858         {
1859             case DlbState::offUser: break;
1860             case DlbState::offForever:
1861                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1862                 break;
1863             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1864             case DlbState::onCanTurnOff:
1865                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1866                 break;
1867             case DlbState::onUser:
1868                 return forceDlbOffOrBail(
1869                         dlbState,
1870                         reasonStr
1871                                 + " In load balanced runs binary reproducibility cannot be "
1872                                   "ensured.",
1873                         mdlog);
1874             default:
1875                 gmx_fatal(FARGS,
1876                           "Death horror: undefined case (%d) for load balancing choice",
1877                           static_cast<int>(dlbState));
1878         }
1879     }
1880
1881     return dlbState;
1882 }
1883
1884 static gmx_domdec_comm_t* init_dd_comm()
1885 {
1886     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1887
1888     comm->n_load_have    = 0;
1889     comm->n_load_collect = 0;
1890
1891     comm->haveTurnedOffDlb = false;
1892
1893     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1894     {
1895         comm->sum_nat[i] = 0;
1896     }
1897     comm->ndecomp   = 0;
1898     comm->nload     = 0;
1899     comm->load_step = 0;
1900     comm->load_sum  = 0;
1901     comm->load_max  = 0;
1902     clear_ivec(comm->load_lim);
1903     comm->load_mdf = 0;
1904     comm->load_pme = 0;
1905
1906     /* This should be replaced by a unique pointer */
1907     comm->balanceRegion = ddBalanceRegionAllocate();
1908
1909     return comm;
1910 }
1911
1912 static void setupUpdateGroups(const gmx::MDLogger&              mdlog,
1913                               const gmx_mtop_t&                 mtop,
1914                               ArrayRef<const RangePartitioning> updateGroupingsPerMoleculeType,
1915                               const bool                        useUpdateGroups,
1916                               const real                        maxUpdateGroupRadius,
1917                               DDSystemInfo*                     systemInfo)
1918 {
1919     systemInfo->updateGroupingsPerMoleculeType = updateGroupingsPerMoleculeType;
1920     systemInfo->useUpdateGroups                = useUpdateGroups;
1921     systemInfo->maxUpdateGroupRadius           = maxUpdateGroupRadius;
1922
1923     if (systemInfo->useUpdateGroups)
1924     {
1925         int numUpdateGroups = 0;
1926         for (const auto& molblock : mtop.molblock)
1927         {
1928             numUpdateGroups += molblock.nmol
1929                                * systemInfo->updateGroupingsPerMoleculeType[molblock.type].numBlocks();
1930         }
1931
1932         GMX_LOG(mdlog.info)
1933                 .appendTextFormatted(
1934                         "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1935                         "nm\n",
1936                         numUpdateGroups,
1937                         mtop.natoms / static_cast<double>(numUpdateGroups),
1938                         systemInfo->maxUpdateGroupRadius);
1939     }
1940 }
1941
1942 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
1943     npbcdim(numPbcDimensions(ir.pbcType)),
1944     numBoundedDimensions(inputrec2nboundeddim(&ir)),
1945     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
1946     haveScrewPBC(ir.pbcType == PbcType::Screw)
1947 {
1948 }
1949
1950 /* Returns whether molecules are always whole, i.e. not broken by PBC */
1951 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
1952                                     const bool                                  useUpdateGroups,
1953                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType)
1954 {
1955     if (useUpdateGroups)
1956     {
1957         GMX_RELEASE_ASSERT(updateGroupingsPerMoleculeType.size() == mtop.moltype.size(),
1958                            "Need one grouping per moltype");
1959         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
1960         {
1961             if (updateGroupingsPerMoleculeType[mol].numBlocks() > 1)
1962             {
1963                 return false;
1964             }
1965         }
1966     }
1967     else
1968     {
1969         for (const auto& moltype : mtop.moltype)
1970         {
1971             if (moltype.atoms.nr > 1)
1972             {
1973                 return false;
1974             }
1975         }
1976     }
1977
1978     return true;
1979 }
1980
1981 /*! \brief Generate the simulation system information */
1982 static DDSystemInfo getSystemInfo(const gmx::MDLogger&              mdlog,
1983                                   DDRole                            ddRole,
1984                                   MPI_Comm                          communicator,
1985                                   const DomdecOptions&              options,
1986                                   const gmx_mtop_t&                 mtop,
1987                                   const t_inputrec&                 ir,
1988                                   const matrix                      box,
1989                                   ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
1990                                   const bool                        useUpdateGroups,
1991                                   const real                        maxUpdateGroupRadius,
1992                                   gmx::ArrayRef<const gmx::RVec>    xGlobal)
1993 {
1994     const real tenPercentMargin = 1.1;
1995
1996     DDSystemInfo systemInfo;
1997
1998     setupUpdateGroups(
1999             mdlog, mtop, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, &systemInfo);
2000
2001     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2002             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingsPerMoleculeType);
2003     systemInfo.haveInterDomainBondeds =
2004             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2005     systemInfo.haveInterDomainMultiBodyBondeds =
2006             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(mtop) > 0);
2007
2008     if (systemInfo.useUpdateGroups)
2009     {
2010         systemInfo.mayHaveSplitConstraints = false;
2011         systemInfo.mayHaveSplitSettles     = false;
2012     }
2013     else
2014     {
2015         systemInfo.mayHaveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2016                                               || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2017         systemInfo.mayHaveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2018     }
2019
2020     if (ir.rlist == 0)
2021     {
2022         /* Set the cut-off to some very large value,
2023          * so we don't need if statements everywhere in the code.
2024          * We use sqrt, since the cut-off is squared in some places.
2025          */
2026         systemInfo.cutoff = GMX_CUTOFF_INF;
2027     }
2028     else
2029     {
2030         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2031     }
2032     systemInfo.minCutoffForMultiBody = 0;
2033
2034     /* Determine the minimum cell size limit, affected by many factors */
2035     systemInfo.cellsizeLimit             = 0;
2036     systemInfo.filterBondedCommunication = false;
2037
2038     /* We do not allow home atoms to move beyond the neighboring domain
2039      * between domain decomposition steps, which limits the cell size.
2040      * Get an estimate of cell size limit due to atom displacement.
2041      * In most cases this is a large overestimate, because it assumes
2042      * non-interaction atoms.
2043      * We set the chance to 1 in a trillion steps.
2044      */
2045     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2046     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2047             mtop, ir, systemInfo.updateGroupingsPerMoleculeType, c_chanceThatAtomMovesBeyondDomain);
2048     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2049
2050     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2051
2052     /* TODO: PME decomposition currently requires atoms not to be more than
2053      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2054      *       In nearly all cases, limitForAtomDisplacement will be smaller
2055      *       than 2/3*rlist, so the PME requirement is satisfied.
2056      *       But it would be better for both correctness and performance
2057      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2058      *       Note that we would need to improve the pairlist buffer case.
2059      */
2060
2061     if (systemInfo.haveInterDomainBondeds)
2062     {
2063         if (options.minimumCommunicationRange > 0)
2064         {
2065             systemInfo.minCutoffForMultiBody =
2066                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2067             if (options.useBondedCommunication)
2068             {
2069                 systemInfo.filterBondedCommunication =
2070                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2071             }
2072             else
2073             {
2074                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2075             }
2076         }
2077         else if (ir.bPeriodicMols)
2078         {
2079             /* Can not easily determine the required cut-off */
2080             GMX_LOG(mdlog.warning)
2081                     .appendText(
2082                             "NOTE: Periodic molecules are present in this system. Because of this, "
2083                             "the domain decomposition algorithm cannot easily determine the "
2084                             "minimum cell size that it requires for treating bonded interactions. "
2085                             "Instead, domain decomposition will assume that half the non-bonded "
2086                             "cut-off will be a suitable lower bound.");
2087             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2088         }
2089         else
2090         {
2091             real r_2b = 0;
2092             real r_mb = 0;
2093
2094             if (ddRole == DDRole::Master)
2095             {
2096                 dd_bonded_cg_distance(mdlog, mtop, ir, xGlobal, box, options.ddBondedChecking, &r_2b, &r_mb);
2097             }
2098             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2099             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2100
2101             /* We use an initial margin of 10% for the minimum cell size,
2102              * except when we are just below the non-bonded cut-off.
2103              */
2104             if (options.useBondedCommunication)
2105             {
2106                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2107                 {
2108                     const real r_bonded              = std::max(r_2b, r_mb);
2109                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2110                     /* This is the (only) place where we turn on the filtering */
2111                     systemInfo.filterBondedCommunication = true;
2112                 }
2113                 else
2114                 {
2115                     const real r_bonded = r_mb;
2116                     systemInfo.minCutoffForMultiBody =
2117                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2118                 }
2119                 /* We determine cutoff_mbody later */
2120                 systemInfo.increaseMultiBodyCutoff = true;
2121             }
2122             else
2123             {
2124                 /* No special bonded communication,
2125                  * simply increase the DD cut-off.
2126                  */
2127                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2128                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2129             }
2130         }
2131         GMX_LOG(mdlog.info)
2132                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2133                                      systemInfo.minCutoffForMultiBody);
2134
2135         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2136     }
2137
2138     systemInfo.constraintCommunicationRange = 0;
2139     if (systemInfo.mayHaveSplitConstraints && options.constraintCommunicationRange <= 0)
2140     {
2141         /* There is a cell size limit due to the constraints (P-LINCS) */
2142         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2143         GMX_LOG(mdlog.info)
2144                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2145                                      systemInfo.constraintCommunicationRange);
2146         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2147         {
2148             GMX_LOG(mdlog.info)
2149                     .appendText(
2150                             "This distance will limit the DD cell size, you can override this with "
2151                             "-rcon");
2152         }
2153     }
2154     else if (options.constraintCommunicationRange > 0)
2155     {
2156         /* Here we do not check for dd->splitConstraints.
2157          * because one can also set a cell size limit for virtual sites only
2158          * and at this point we don't know yet if there are intercg v-sites.
2159          */
2160         GMX_LOG(mdlog.info)
2161                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2162                                      options.constraintCommunicationRange);
2163         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2164     }
2165     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2166
2167     return systemInfo;
2168 }
2169
2170 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2171  * implemented. */
2172 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2173                              DDRole               ddRole,
2174                              MPI_Comm             communicator,
2175                              int                  numNodes,
2176                              const DomdecOptions& options,
2177                              const DDSettings&    ddSettings,
2178                              const DDSystemInfo&  systemInfo,
2179                              const real           cellsizeLimit,
2180                              const gmx_ddbox_t&   ddbox)
2181 {
2182     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2183     {
2184         const bool  bC = (systemInfo.mayHaveSplitConstraints
2185                          && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2186         std::string message =
2187                 gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
2188                                   !bC ? "-rdd" : "-rcon",
2189                                   ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2190                                   bC ? " or your LINCS settings" : "");
2191
2192         gmx_fatal_collective(FARGS,
2193                              communicator,
2194                              ddRole == DDRole::Master,
2195                              "There is no domain decomposition for %d ranks that is compatible "
2196                              "with the given box and a minimum cell size of %g nm\n"
2197                              "%s\n"
2198                              "Look in the log file for details on the domain decomposition",
2199                              numNodes - ddGridSetup.numPmeOnlyRanks,
2200                              cellsizeLimit,
2201                              message.c_str());
2202     }
2203
2204     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2205     if (acs < cellsizeLimit)
2206     {
2207         if (options.numCells[XX] <= 0)
2208         {
2209             GMX_RELEASE_ASSERT(
2210                     false,
2211                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2212         }
2213         else
2214         {
2215             gmx_fatal_collective(
2216                     FARGS,
2217                     communicator,
2218                     ddRole == DDRole::Master,
2219                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2220                     "options -dd, -rdd or -rcon, see the log file for details",
2221                     acs,
2222                     cellsizeLimit);
2223         }
2224     }
2225
2226     const int numPPRanks =
2227             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2228     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2229     {
2230         gmx_fatal_collective(FARGS,
2231                              communicator,
2232                              ddRole == DDRole::Master,
2233                              "The size of the domain decomposition grid (%d) does not match the "
2234                              "number of PP ranks (%d). The total number of ranks is %d",
2235                              numPPRanks,
2236                              numNodes - ddGridSetup.numPmeOnlyRanks,
2237                              numNodes);
2238     }
2239     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2240     {
2241         gmx_fatal_collective(FARGS,
2242                              communicator,
2243                              ddRole == DDRole::Master,
2244                              "The number of separate PME ranks (%d) is larger than the number of "
2245                              "PP ranks (%d), this is not supported.",
2246                              ddGridSetup.numPmeOnlyRanks,
2247                              numPPRanks);
2248     }
2249 }
2250
2251 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2252 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2253                                   int                  numNodes,
2254                                   const DDGridSetup&   ddGridSetup,
2255                                   const t_inputrec&    ir)
2256 {
2257     GMX_LOG(mdlog.info)
2258             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2259                                  ddGridSetup.numDomains[XX],
2260                                  ddGridSetup.numDomains[YY],
2261                                  ddGridSetup.numDomains[ZZ],
2262                                  ddGridSetup.numPmeOnlyRanks);
2263
2264     DDRankSetup ddRankSetup;
2265
2266     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2267     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2268
2269     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2270     if (ddRankSetup.usePmeOnlyRanks)
2271     {
2272         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2273     }
2274     else
2275     {
2276         ddRankSetup.numRanksDoingPme =
2277                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2278     }
2279
2280     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2281     {
2282         /* The following choices should match those
2283          * in comm_cost_est in domdec_setup.c.
2284          * Note that here the checks have to take into account
2285          * that the decomposition might occur in a different order than xyz
2286          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2287          * in which case they will not match those in comm_cost_est,
2288          * but since that is mainly for testing purposes that's fine.
2289          */
2290         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2291             && ddGridSetup.ddDimensions[1] == YY
2292             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2293             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2294             && getenv("GMX_PMEONEDD") == nullptr)
2295         {
2296             ddRankSetup.npmedecompdim = 2;
2297             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2298             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2299         }
2300         else
2301         {
2302             /* In case nc is 1 in both x and y we could still choose to
2303              * decompose pme in y instead of x, but we use x for simplicity.
2304              */
2305             ddRankSetup.npmedecompdim = 1;
2306             if (ddGridSetup.ddDimensions[0] == YY)
2307             {
2308                 ddRankSetup.npmenodes_x = 1;
2309                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2310             }
2311             else
2312             {
2313                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2314                 ddRankSetup.npmenodes_y = 1;
2315             }
2316         }
2317         GMX_LOG(mdlog.info)
2318                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2319                                      ddRankSetup.npmenodes_x,
2320                                      ddRankSetup.npmenodes_y,
2321                                      1);
2322     }
2323     else
2324     {
2325         ddRankSetup.npmedecompdim = 0;
2326         ddRankSetup.npmenodes_x   = 0;
2327         ddRankSetup.npmenodes_y   = 0;
2328     }
2329
2330     return ddRankSetup;
2331 }
2332
2333 /*! \brief Set the cell size and interaction limits */
2334 static void set_dd_limits(const gmx::MDLogger& mdlog,
2335                           DDRole               ddRole,
2336                           gmx_domdec_t*        dd,
2337                           const DomdecOptions& options,
2338                           const DDSettings&    ddSettings,
2339                           const DDSystemInfo&  systemInfo,
2340                           const DDGridSetup&   ddGridSetup,
2341                           const int            numPPRanks,
2342                           const gmx_mtop_t&    mtop,
2343                           const t_inputrec&    ir,
2344                           const gmx_ddbox_t&   ddbox)
2345 {
2346     gmx_domdec_comm_t* comm = dd->comm;
2347     comm->ddSettings        = ddSettings;
2348
2349     /* Initialize to GPU share count to 0, might change later */
2350     comm->nrank_gpu_shared = 0;
2351
2352     comm->dlbState = comm->ddSettings.initialDlbState;
2353     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2354     /* To consider turning DLB on after 2*nstlist steps we need to check
2355      * at partitioning count 3. Thus we need to increase the first count by 2.
2356      */
2357     comm->ddPartioningCountFirstDlbOff += 2;
2358
2359     comm->bPMELoadBalDLBLimits = FALSE;
2360
2361     /* Allocate the charge group/atom sorting struct */
2362     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2363
2364     comm->systemInfo = systemInfo;
2365
2366     if (systemInfo.useUpdateGroups)
2367     {
2368         /* Note: We would like to use dd->nnodes for the atom count estimate,
2369          *       but that is not yet available here. But this anyhow only
2370          *       affect performance up to the second dd_partition_system call.
2371          */
2372         const int homeAtomCountEstimate = mtop.natoms / numPPRanks;
2373         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2374                 mtop, systemInfo.updateGroupingsPerMoleculeType, maxReferenceTemperature(ir), homeAtomCountEstimate);
2375     }
2376
2377     /* Set the DD setup given by ddGridSetup */
2378     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2379     dd->ndim = ddGridSetup.numDDDimensions;
2380     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2381
2382     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2383
2384     snew(comm->slb_frac, DIM);
2385     if (isDlbDisabled(comm))
2386     {
2387         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2388         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2389         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2390     }
2391
2392     /* Set the multi-body cut-off and cellsize limit for DLB */
2393     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2394     comm->cellsize_limit = systemInfo.cellsizeLimit;
2395     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2396     {
2397         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2398         {
2399             /* Set the bonded communication distance to halfway
2400              * the minimum and the maximum,
2401              * since the extra communication cost is nearly zero.
2402              */
2403             real acs           = average_cellsize_min(ddbox, dd->numCells);
2404             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2405             if (!isDlbDisabled(comm))
2406             {
2407                 /* Check if this does not limit the scaling */
2408                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2409             }
2410             if (!systemInfo.filterBondedCommunication)
2411             {
2412                 /* Without bBondComm do not go beyond the n.b. cut-off */
2413                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2414                 if (comm->cellsize_limit >= systemInfo.cutoff)
2415                 {
2416                     /* We don't loose a lot of efficieny
2417                      * when increasing it to the n.b. cut-off.
2418                      * It can even be slightly faster, because we need
2419                      * less checks for the communication setup.
2420                      */
2421                     comm->cutoff_mbody = systemInfo.cutoff;
2422                 }
2423             }
2424             /* Check if we did not end up below our original limit */
2425             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2426
2427             if (comm->cutoff_mbody > comm->cellsize_limit)
2428             {
2429                 comm->cellsize_limit = comm->cutoff_mbody;
2430             }
2431         }
2432         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2433     }
2434
2435     if (debug)
2436     {
2437         fprintf(debug,
2438                 "Bonded atom communication beyond the cut-off: %s\n"
2439                 "cellsize limit %f\n",
2440                 gmx::boolToString(systemInfo.filterBondedCommunication),
2441                 comm->cellsize_limit);
2442     }
2443
2444     if (ddRole == DDRole::Master)
2445     {
2446         check_dd_restrictions(dd, ir, mdlog);
2447     }
2448 }
2449
2450 static void writeSettings(gmx::TextWriter*   log,
2451                           gmx_domdec_t*      dd,
2452                           const gmx_mtop_t&  mtop,
2453                           const t_inputrec&  ir,
2454                           gmx_bool           bDynLoadBal,
2455                           real               dlb_scale,
2456                           const gmx_ddbox_t* ddbox)
2457 {
2458     gmx_domdec_comm_t* comm = dd->comm;
2459
2460     if (bDynLoadBal)
2461     {
2462         log->writeString("The maximum number of communication pulses is:");
2463         for (int d = 0; d < dd->ndim; d++)
2464         {
2465             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2466         }
2467         log->ensureLineBreak();
2468         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2469                                 comm->cellsize_limit);
2470         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2471         log->writeString("The allowed shrink of domain decomposition cells is:");
2472         for (int d = 0; d < DIM; d++)
2473         {
2474             if (dd->numCells[d] > 1)
2475             {
2476                 const real shrink =
2477                         (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2478                                 ? 0
2479                                 : comm->cellsize_min_dlb[d]
2480                                           / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2481                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2482             }
2483         }
2484         log->ensureLineBreak();
2485     }
2486     else
2487     {
2488         ivec np;
2489         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2490         log->writeString("The initial number of communication pulses is:");
2491         for (int d = 0; d < dd->ndim; d++)
2492         {
2493             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2494         }
2495         log->ensureLineBreak();
2496         log->writeString("The initial domain decomposition cell size is:");
2497         for (int d = 0; d < DIM; d++)
2498         {
2499             if (dd->numCells[d] > 1)
2500             {
2501                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2502             }
2503         }
2504         log->ensureLineBreak();
2505         log->writeLine();
2506     }
2507
2508     const bool haveInterDomainVsites =
2509             (countInterUpdategroupVsites(mtop, comm->systemInfo.updateGroupingsPerMoleculeType) != 0);
2510
2511     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2512         || comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2513     {
2514         std::string decompUnits;
2515         if (comm->systemInfo.useUpdateGroups)
2516         {
2517             decompUnits = "atom groups";
2518         }
2519         else
2520         {
2521             decompUnits = "atoms";
2522         }
2523
2524         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2525                                 decompUnits.c_str());
2526         log->writeLineFormatted(
2527                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2528
2529         real limit = 0;
2530         if (bDynLoadBal)
2531         {
2532             limit = dd->comm->cellsize_limit;
2533         }
2534         else
2535         {
2536             if (dd->unitCellInfo.ddBoxIsDynamic)
2537             {
2538                 log->writeLine(
2539                         "(the following are initial values, they could change due to box "
2540                         "deformation)");
2541             }
2542             limit = dd->comm->cellsize_min[XX];
2543             for (int d = 1; d < DIM; d++)
2544             {
2545                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2546             }
2547         }
2548
2549         if (comm->systemInfo.haveInterDomainBondeds)
2550         {
2551             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2552                                     "two-body bonded interactions",
2553                                     "(-rdd)",
2554                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2555             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2556                                     "multi-body bonded interactions",
2557                                     "(-rdd)",
2558                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2559                                             ? comm->cutoff_mbody
2560                                             : std::min(comm->systemInfo.cutoff, limit));
2561         }
2562         if (haveInterDomainVsites)
2563         {
2564             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2565         }
2566         if (comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2567         {
2568             std::string separation =
2569                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir.nProjOrder);
2570             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2571         }
2572         log->ensureLineBreak();
2573     }
2574 }
2575
2576 static void logSettings(const gmx::MDLogger& mdlog,
2577                         gmx_domdec_t*        dd,
2578                         const gmx_mtop_t&    mtop,
2579                         const t_inputrec&    ir,
2580                         real                 dlb_scale,
2581                         const gmx_ddbox_t*   ddbox)
2582 {
2583     gmx::StringOutputStream stream;
2584     gmx::TextWriter         log(&stream);
2585     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2586     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2587     {
2588         {
2589             log.ensureEmptyLine();
2590             log.writeLine(
2591                     "When dynamic load balancing gets turned on, these settings will change to:");
2592         }
2593         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2594     }
2595     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2596 }
2597
2598 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2599                                 gmx_domdec_t*        dd,
2600                                 real                 dlb_scale,
2601                                 const t_inputrec&    inputrec,
2602                                 const gmx_ddbox_t*   ddbox)
2603 {
2604     int npulse       = 0;
2605     int npulse_d_max = 0;
2606     int npulse_d     = 0;
2607
2608     gmx_domdec_comm_t* comm = dd->comm;
2609
2610     bool bNoCutOff = (inputrec.rvdw == 0 || inputrec.rcoulomb == 0);
2611
2612     /* Determine the maximum number of comm. pulses in one dimension */
2613
2614     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2615
2616     /* Determine the maximum required number of grid pulses */
2617     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2618     {
2619         /* Only a single pulse is required */
2620         npulse = 1;
2621     }
2622     else if (!bNoCutOff && comm->cellsize_limit > 0)
2623     {
2624         /* We round down slightly here to avoid overhead due to the latency
2625          * of extra communication calls when the cut-off
2626          * would be only slightly longer than the cell size.
2627          * Later cellsize_limit is redetermined,
2628          * so we can not miss interactions due to this rounding.
2629          */
2630         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2631     }
2632     else
2633     {
2634         /* There is no cell size limit */
2635         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2636     }
2637
2638     if (!bNoCutOff && npulse > 1)
2639     {
2640         /* See if we can do with less pulses, based on dlb_scale */
2641         npulse_d_max = 0;
2642         for (int d = 0; d < dd->ndim; d++)
2643         {
2644             int dim  = dd->dim[d];
2645             npulse_d = static_cast<int>(
2646                     1
2647                     + dd->numCells[dim] * comm->systemInfo.cutoff
2648                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2649             npulse_d_max = std::max(npulse_d_max, npulse_d);
2650         }
2651         npulse = std::min(npulse, npulse_d_max);
2652     }
2653
2654     /* This env var can override npulse */
2655     const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2656     if (ddPulseEnv > 0)
2657     {
2658         npulse = ddPulseEnv;
2659     }
2660
2661     comm->maxpulse       = 1;
2662     comm->bVacDLBNoLimit = (inputrec.pbcType == PbcType::No);
2663     for (int d = 0; d < dd->ndim; d++)
2664     {
2665         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2666         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2667         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2668         {
2669             comm->bVacDLBNoLimit = FALSE;
2670         }
2671     }
2672
2673     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2674     if (!comm->bVacDLBNoLimit)
2675     {
2676         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2677     }
2678     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2679     /* Set the minimum cell size for each DD dimension */
2680     for (int d = 0; d < dd->ndim; d++)
2681     {
2682         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2683         {
2684             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2685         }
2686         else
2687         {
2688             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2689         }
2690     }
2691     if (comm->cutoff_mbody <= 0)
2692     {
2693         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2694     }
2695     if (isDlbOn(comm))
2696     {
2697         set_dlb_limits(dd);
2698     }
2699 }
2700
2701 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2702 {
2703     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2704 }
2705
2706 bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType)
2707 {
2708     /* If each molecule is a single charge group
2709      * or we use domain decomposition for each periodic dimension,
2710      * we do not need to take pbc into account for the bonded interactions.
2711      */
2712     return (pbcType != PbcType::No && dd.comm->systemInfo.haveInterDomainBondeds
2713             && !(dd.numCells[XX] > 1 && dd.numCells[YY] > 1
2714                  && (dd.numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2715 }
2716
2717 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2718 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2719                                   gmx_domdec_t*        dd,
2720                                   real                 dlb_scale,
2721                                   const gmx_mtop_t&    mtop,
2722                                   const t_inputrec&    inputrec,
2723                                   const gmx_ddbox_t*   ddbox)
2724 {
2725     gmx_domdec_comm_t* comm        = dd->comm;
2726     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2727
2728     if (EEL_PME(inputrec.coulombtype) || EVDW_PME(inputrec.vdwtype))
2729     {
2730         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2731         if (ddRankSetup.npmedecompdim >= 2)
2732         {
2733             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2734         }
2735     }
2736     else
2737     {
2738         ddRankSetup.numRanksDoingPme = 0;
2739         if (dd->pme_nodeid >= 0)
2740         {
2741             gmx_fatal_collective(FARGS,
2742                                  dd->mpi_comm_all,
2743                                  DDMASTER(dd),
2744                                  "Can not have separate PME ranks without PME electrostatics");
2745         }
2746     }
2747
2748     if (debug)
2749     {
2750         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2751     }
2752     if (!isDlbDisabled(comm))
2753     {
2754         set_cell_limits_dlb(mdlog, dd, dlb_scale, inputrec, ddbox);
2755     }
2756
2757     logSettings(mdlog, dd, mtop, inputrec, dlb_scale, ddbox);
2758
2759     const real vol_frac = (inputrec.pbcType == PbcType::No)
2760                                   ? (1 - 1 / static_cast<double>(dd->nnodes))
2761                                   : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2762                                      / static_cast<double>(dd->nnodes));
2763     if (debug)
2764     {
2765         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2766     }
2767     int natoms_tot = mtop.natoms;
2768
2769     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2770 }
2771
2772 /*! \brief Get some important DD parameters which can be modified by env.vars */
2773 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2774                                 const DomdecOptions&     options,
2775                                 const gmx::MdrunOptions& mdrunOptions,
2776                                 const t_inputrec&        ir)
2777 {
2778     DDSettings ddSettings;
2779
2780     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2781     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2782     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2783     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2784     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2785     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2786     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2787     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2788     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2789
2790     if (ddSettings.useSendRecv2)
2791     {
2792         GMX_LOG(mdlog.info)
2793                 .appendText(
2794                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2795                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2796                         "communication");
2797     }
2798
2799     if (ddSettings.eFlop)
2800     {
2801         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2802         ddSettings.recordLoad = true;
2803     }
2804     else
2805     {
2806         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2807     }
2808
2809     ddSettings.initialDlbState =
2810             determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
2811     GMX_LOG(mdlog.info)
2812             .appendTextFormatted("Dynamic load balancing: %s",
2813                                  enumValueToString(ddSettings.initialDlbState));
2814
2815     return ddSettings;
2816 }
2817
2818 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2819
2820 gmx_domdec_t::~gmx_domdec_t() = default;
2821
2822 namespace gmx
2823 {
2824
2825 // TODO once the functionality stablizes, move this class and
2826 // supporting functionality into builder.cpp
2827 /*! \brief Impl class for DD builder */
2828 class DomainDecompositionBuilder::Impl
2829 {
2830 public:
2831     //! Constructor
2832     Impl(const MDLogger&                   mdlog,
2833          t_commrec*                        cr,
2834          const DomdecOptions&              options,
2835          const MdrunOptions&               mdrunOptions,
2836          const gmx_mtop_t&                 mtop,
2837          const t_inputrec&                 ir,
2838          const matrix                      box,
2839          ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2840          bool                              useUpdateGroups,
2841          real                              maxUpdateGroupRadius,
2842          ArrayRef<const RVec>              xGlobal);
2843
2844     //! Build the resulting DD manager
2845     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2846
2847     //! Objects used in constructing and configuring DD
2848     //! {
2849     //! Logging object
2850     const MDLogger& mdlog_;
2851     //! Communication object
2852     t_commrec* cr_;
2853     //! User-supplied options configuring DD behavior
2854     const DomdecOptions options_;
2855     //! Global system topology
2856     const gmx_mtop_t& mtop_;
2857     //! User input values from the tpr file
2858     const t_inputrec& ir_;
2859     //! }
2860
2861     //! Internal objects used in constructing DD
2862     //! {
2863     //! Settings combined from the user input
2864     DDSettings ddSettings_;
2865     //! Information derived from the simulation system
2866     DDSystemInfo systemInfo_;
2867     //! Box structure
2868     gmx_ddbox_t ddbox_ = { 0 };
2869     //! Organization of the DD grids
2870     DDGridSetup ddGridSetup_;
2871     //! Organzation of the DD ranks
2872     DDRankSetup ddRankSetup_;
2873     //! Number of DD cells in each dimension
2874     ivec ddCellIndex_ = { 0, 0, 0 };
2875     //! IDs of PME-only ranks
2876     std::vector<int> pmeRanks_;
2877     //! Contains a valid Cartesian-communicator-based setup, or defaults.
2878     CartesianRankSetup cartSetup_;
2879     //! }
2880 };
2881
2882 DomainDecompositionBuilder::Impl::Impl(const MDLogger&                   mdlog,
2883                                        t_commrec*                        cr,
2884                                        const DomdecOptions&              options,
2885                                        const MdrunOptions&               mdrunOptions,
2886                                        const gmx_mtop_t&                 mtop,
2887                                        const t_inputrec&                 ir,
2888                                        const matrix                      box,
2889                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2890                                        const bool                        useUpdateGroups,
2891                                        const real                        maxUpdateGroupRadius,
2892                                        ArrayRef<const RVec>              xGlobal) :
2893     mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir)
2894 {
2895     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2896
2897     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2898
2899     if (ddSettings_.eFlop > 1)
2900     {
2901         /* Ensure that we have different random flop counts on different ranks */
2902         srand(1 + cr_->rankInDefaultCommunicator);
2903     }
2904
2905     systemInfo_ = getSystemInfo(mdlog_,
2906                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2907                                 cr->mpiDefaultCommunicator,
2908                                 options_,
2909                                 mtop_,
2910                                 ir_,
2911                                 box,
2912                                 updateGroupingPerMoleculeType,
2913                                 useUpdateGroups,
2914                                 maxUpdateGroupRadius,
2915                                 xGlobal);
2916
2917     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
2918     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2919     checkForValidRankCountRequests(
2920             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
2921
2922     // DD grid setup uses a more different cell size limit for
2923     // automated setup than the one in systemInfo_. The latter is used
2924     // in set_dd_limits() to configure DLB, for example.
2925     const real gridSetupCellsizeLimit =
2926             getDDGridSetupCellSizeLimit(mdlog_,
2927                                         !isDlbDisabled(ddSettings_.initialDlbState),
2928                                         options_.dlbScaling,
2929                                         ir_,
2930                                         systemInfo_.cellsizeLimit);
2931     ddGridSetup_ = getDDGridSetup(mdlog_,
2932                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2933                                   cr->mpiDefaultCommunicator,
2934                                   numRanksRequested,
2935                                   options_,
2936                                   ddSettings_,
2937                                   systemInfo_,
2938                                   gridSetupCellsizeLimit,
2939                                   mtop_,
2940                                   ir_,
2941                                   box,
2942                                   xGlobal,
2943                                   &ddbox_);
2944     checkDDGridSetup(ddGridSetup_,
2945                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2946                      cr->mpiDefaultCommunicator,
2947                      cr->sizeOfDefaultCommunicator,
2948                      options_,
2949                      ddSettings_,
2950                      systemInfo_,
2951                      gridSetupCellsizeLimit,
2952                      ddbox_);
2953
2954     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
2955
2956     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
2957
2958     /* Generate the group communicator, also decides the duty of each rank */
2959     cartSetup_ = makeGroupCommunicators(
2960             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
2961 }
2962
2963 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
2964 {
2965     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
2966
2967     copy_ivec(ddCellIndex_, dd->ci);
2968
2969     dd->comm = init_dd_comm();
2970
2971     dd->comm->ddRankSetup        = ddRankSetup_;
2972     dd->comm->cartesianRankSetup = cartSetup_;
2973
2974     set_dd_limits(mdlog_,
2975                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2976                   dd,
2977                   options_,
2978                   ddSettings_,
2979                   systemInfo_,
2980                   ddGridSetup_,
2981                   ddRankSetup_.numPPRanks,
2982                   mtop_,
2983                   ir_,
2984                   ddbox_);
2985
2986     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
2987
2988     if (thisRankHasDuty(cr_, DUTY_PP))
2989     {
2990         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, mtop_, ir_, &ddbox_);
2991
2992         setup_neighbor_relations(dd);
2993     }
2994
2995     /* Set overallocation to avoid frequent reallocation of arrays */
2996     set_over_alloc_dd(true);
2997
2998     dd->atomSets = atomSets;
2999
3000     dd->localTopologyChecker = std::make_unique<LocalTopologyChecker>();
3001
3002     return dd;
3003 }
3004
3005 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3006                                                        t_commrec*           cr,
3007                                                        const DomdecOptions& options,
3008                                                        const MdrunOptions&  mdrunOptions,
3009                                                        const gmx_mtop_t&    mtop,
3010                                                        const t_inputrec&    ir,
3011                                                        const matrix         box,
3012                                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
3013                                                        const bool           useUpdateGroups,
3014                                                        const real           maxUpdateGroupRadius,
3015                                                        ArrayRef<const RVec> xGlobal) :
3016     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, xGlobal))
3017 {
3018 }
3019
3020 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3021 {
3022     return impl_->build(atomSets);
3023 }
3024
3025 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3026
3027 } // namespace gmx
3028
3029 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3030 {
3031     gmx_ddbox_t ddbox;
3032     int         LocallyLimited = 0;
3033
3034     const auto* dd = cr->dd;
3035
3036     set_ddbox(*dd, false, box, true, x, &ddbox);
3037
3038     LocallyLimited = 0;
3039
3040     for (int d = 0; d < dd->ndim; d++)
3041     {
3042         const int dim = dd->dim[d];
3043
3044         real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3045         if (dd->unitCellInfo.ddBoxIsDynamic)
3046         {
3047             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3048         }
3049
3050         const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3051
3052         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3053         {
3054             if (np > dd->comm->cd[d].np_dlb)
3055             {
3056                 return FALSE;
3057             }
3058
3059             /* If a current local cell size is smaller than the requested
3060              * cut-off, we could still fix it, but this gets very complicated.
3061              * Without fixing here, we might actually need more checks.
3062              */
3063             real cellSizeAlongDim =
3064                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3065             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3066             {
3067                 LocallyLimited = 1;
3068             }
3069         }
3070     }
3071
3072     if (!isDlbDisabled(dd->comm))
3073     {
3074         /* If DLB is not active yet, we don't need to check the grid jumps.
3075          * Actually we shouldn't, because then the grid jump data is not set.
3076          */
3077         if (isDlbOn(dd->comm) && gmx::check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3078         {
3079             LocallyLimited = 1;
3080         }
3081
3082         gmx_sumi(1, &LocallyLimited, cr);
3083
3084         if (LocallyLimited > 0)
3085         {
3086             return FALSE;
3087         }
3088     }
3089
3090     return TRUE;
3091 }
3092
3093 bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3094 {
3095     bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3096
3097     if (bCutoffAllowed)
3098     {
3099         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3100     }
3101
3102     return bCutoffAllowed;
3103 }
3104
3105 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3106                               const t_commrec&                cr,
3107                               const gmx::DeviceStreamManager& deviceStreamManager,
3108                               gmx_wallcycle*                  wcycle)
3109 {
3110     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3111                        "Local non-bonded stream should be valid when using"
3112                        "GPU halo exchange.");
3113     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3114                        "Non-local non-bonded stream should be valid when using "
3115                        "GPU halo exchange.");
3116
3117     if (cr.dd->gpuHaloExchange[0].empty())
3118     {
3119         GMX_LOG(mdlog.warning)
3120                 .asParagraph()
3121                 .appendTextFormatted(
3122                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3123                         "by the "
3124                         "GMX_GPU_DD_COMMS environment variable.");
3125     }
3126
3127     for (int d = 0; d < cr.dd->ndim; d++)
3128     {
3129         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3130         {
3131             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3132                     cr.dd,
3133                     d,
3134                     cr.mpi_comm_mygroup,
3135                     deviceStreamManager.context(),
3136                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3137                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3138                     pulse,
3139                     wcycle));
3140         }
3141     }
3142 }
3143
3144 void reinitGpuHaloExchange(const t_commrec&              cr,
3145                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3146                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3147 {
3148     for (int d = 0; d < cr.dd->ndim; d++)
3149     {
3150         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3151         {
3152             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3153         }
3154     }
3155 }
3156
3157 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3158                                    const matrix          box,
3159                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3160 {
3161     for (int d = 0; d < cr.dd->ndim; d++)
3162     {
3163         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3164         {
3165             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3166         }
3167     }
3168 }
3169
3170 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3171 {
3172     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3173     {
3174         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3175         {
3176             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3177         }
3178     }
3179 }