fe24a703b0b464cbd627c9ca400775941a67461a
[alexxy/gromacs.git] / src / gromacs / domdec / domdec.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
7  * Copyright (c) 2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38
39 #include "gmxpre.h"
40
41 #include "domdec.h"
42
43 #include "config.h"
44
45 #include <cassert>
46 #include <cinttypes>
47 #include <climits>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
52
53 #include <algorithm>
54 #include <memory>
55
56 #include "gromacs/domdec/builder.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlb.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/ga2la.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/options.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/gmxlib/network.h"
66 #include "gromacs/gmxlib/nrnb.h"
67 #include "gromacs/gpu_utils/device_stream_manager.h"
68 #include "gromacs/gpu_utils/gpu_utils.h"
69 #include "gromacs/hardware/hw_info.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/math/vectypes.h"
72 #include "gromacs/mdlib/calc_verletbuf.h"
73 #include "gromacs/mdlib/constr.h"
74 #include "gromacs/mdlib/constraintrange.h"
75 #include "gromacs/mdlib/updategroups.h"
76 #include "gromacs/mdlib/vsite.h"
77 #include "gromacs/mdtypes/commrec.h"
78 #include "gromacs/mdtypes/forceoutput.h"
79 #include "gromacs/mdtypes/inputrec.h"
80 #include "gromacs/mdtypes/mdrunoptions.h"
81 #include "gromacs/mdtypes/state.h"
82 #include "gromacs/pbcutil/ishift.h"
83 #include "gromacs/pbcutil/pbc.h"
84 #include "gromacs/pulling/pull.h"
85 #include "gromacs/timing/wallcycle.h"
86 #include "gromacs/topology/block.h"
87 #include "gromacs/topology/idef.h"
88 #include "gromacs/topology/ifunc.h"
89 #include "gromacs/topology/mtop_lookup.h"
90 #include "gromacs/topology/mtop_util.h"
91 #include "gromacs/topology/topology.h"
92 #include "gromacs/utility/basedefinitions.h"
93 #include "gromacs/utility/basenetwork.h"
94 #include "gromacs/utility/cstringutil.h"
95 #include "gromacs/utility/exceptions.h"
96 #include "gromacs/utility/fatalerror.h"
97 #include "gromacs/utility/gmxmpi.h"
98 #include "gromacs/utility/logger.h"
99 #include "gromacs/utility/real.h"
100 #include "gromacs/utility/smalloc.h"
101 #include "gromacs/utility/strconvert.h"
102 #include "gromacs/utility/stringstream.h"
103 #include "gromacs/utility/stringutil.h"
104 #include "gromacs/utility/textwriter.h"
105
106 #include "atomdistribution.h"
107 #include "box.h"
108 #include "cellsizes.h"
109 #include "distribute.h"
110 #include "domdec_constraints.h"
111 #include "domdec_internal.h"
112 #include "domdec_setup.h"
113 #include "domdec_vsite.h"
114 #include "redistribute.h"
115 #include "utility.h"
116
117 // TODO remove this when moving domdec into gmx namespace
118 using gmx::DdRankOrder;
119 using gmx::DlbOption;
120 using gmx::DomdecOptions;
121
122 static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
123
124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_CGIBS 2
126
127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
128 #define DD_FLAG_NRCG 65535
129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
131
132 /* The DD zone order */
133 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
134                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
135
136 /* The non-bonded zone-pair setup for domain decomposition
137  * The first number is the i-zone, the second number the first j-zone seen by
138  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
139  * As is, this is for 3D decomposition, where there are 4 i-zones.
140  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
141  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
142  */
143 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
144                                                                { 1, 3, 6 },
145                                                                { 2, 5, 6 },
146                                                                { 3, 5, 7 } };
147
148 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
149 {
150     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
151     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
152     xyz[ZZ] = ind % nc[ZZ];
153 }
154
155 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
156 {
157     int ddnodeid = -1;
158
159     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
160     const int                 ddindex   = dd_index(dd->numCells, c);
161     if (cartSetup.bCartesianPP_PME)
162     {
163         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
164     }
165     else if (cartSetup.bCartesianPP)
166     {
167 #if GMX_MPI
168         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
169 #endif
170     }
171     else
172     {
173         ddnodeid = ddindex;
174     }
175
176     return ddnodeid;
177 }
178
179 int ddglatnr(const gmx_domdec_t* dd, int i)
180 {
181     int atnr;
182
183     if (dd == nullptr)
184     {
185         atnr = i + 1;
186     }
187     else
188     {
189         if (i >= dd->comm->atomRanges.numAtomsTotal())
190         {
191             gmx_fatal(FARGS,
192                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
193                       i,
194                       dd->comm->atomRanges.numAtomsTotal());
195         }
196         atnr = dd->globalAtomIndices[i] + 1;
197     }
198
199     return atnr;
200 }
201
202 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
203 {
204     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
205     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
206 }
207
208 void dd_store_state(gmx_domdec_t* dd, t_state* state)
209 {
210     int i;
211
212     if (state->ddp_count != dd->ddp_count)
213     {
214         gmx_incons("The MD state does not match the domain decomposition state");
215     }
216
217     state->cg_gl.resize(dd->ncg_home);
218     for (i = 0; i < dd->ncg_home; i++)
219     {
220         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
221     }
222
223     state->ddp_count_cg_gl = dd->ddp_count;
224 }
225
226 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
227 {
228     return &dd->comm->zones;
229 }
230
231 int dd_numAtomsZones(const gmx_domdec_t& dd)
232 {
233     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
234 }
235
236 int dd_numHomeAtoms(const gmx_domdec_t& dd)
237 {
238     return dd.comm->atomRanges.numHomeAtoms();
239 }
240
241 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
242 {
243     /* We currently set mdatoms entries for all atoms:
244      * local + non-local + communicated for vsite + constraints
245      */
246
247     return dd->comm->atomRanges.numAtomsTotal();
248 }
249
250 int dd_natoms_vsite(const gmx_domdec_t* dd)
251 {
252     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
253 }
254
255 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
256 {
257     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
258     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
259 }
260
261 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
262 {
263     wallcycle_start(wcycle, ewcMOVEX);
264
265     int                    nzone, nat_tot;
266     gmx_domdec_comm_t*     comm;
267     gmx_domdec_comm_dim_t* cd;
268     rvec                   shift = { 0, 0, 0 };
269     gmx_bool               bPBC, bScrew;
270
271     comm = dd->comm;
272
273     nzone   = 1;
274     nat_tot = comm->atomRanges.numHomeAtoms();
275     for (int d = 0; d < dd->ndim; d++)
276     {
277         bPBC   = (dd->ci[dd->dim[d]] == 0);
278         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
279         if (bPBC)
280         {
281             copy_rvec(box[dd->dim[d]], shift);
282         }
283         cd = &comm->cd[d];
284         for (const gmx_domdec_ind_t& ind : cd->ind)
285         {
286             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
287             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
288             int                       n          = 0;
289             if (!bPBC)
290             {
291                 for (int j : ind.index)
292                 {
293                     sendBuffer[n] = x[j];
294                     n++;
295                 }
296             }
297             else if (!bScrew)
298             {
299                 for (int j : ind.index)
300                 {
301                     /* We need to shift the coordinates */
302                     for (int d = 0; d < DIM; d++)
303                     {
304                         sendBuffer[n][d] = x[j][d] + shift[d];
305                     }
306                     n++;
307                 }
308             }
309             else
310             {
311                 for (int j : ind.index)
312                 {
313                     /* Shift x */
314                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
315                     /* Rotate y and z.
316                      * This operation requires a special shift force
317                      * treatment, which is performed in calc_vir.
318                      */
319                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
320                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
321                     n++;
322                 }
323             }
324
325             DDBufferAccess<gmx::RVec> receiveBufferAccess(
326                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
327
328             gmx::ArrayRef<gmx::RVec> receiveBuffer;
329             if (cd->receiveInPlace)
330             {
331                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
332             }
333             else
334             {
335                 receiveBuffer = receiveBufferAccess.buffer;
336             }
337             /* Send and receive the coordinates */
338             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
339
340             if (!cd->receiveInPlace)
341             {
342                 int j = 0;
343                 for (int zone = 0; zone < nzone; zone++)
344                 {
345                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
346                     {
347                         x[i] = receiveBuffer[j++];
348                     }
349                 }
350             }
351             nat_tot += ind.nrecv[nzone + 1];
352         }
353         nzone += nzone;
354     }
355
356     wallcycle_stop(wcycle, ewcMOVEX);
357 }
358
359 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
360 {
361     wallcycle_start(wcycle, ewcMOVEF);
362
363     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
364     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
365
366     gmx_domdec_comm_t& comm    = *dd->comm;
367     int                nzone   = comm.zones.n / 2;
368     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
369     for (int d = dd->ndim - 1; d >= 0; d--)
370     {
371         /* Only forces in domains near the PBC boundaries need to
372            consider PBC in the treatment of fshift */
373         const bool shiftForcesNeedPbc =
374                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
375         const bool applyScrewPbc =
376                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
377         /* Determine which shift vector we need */
378         ivec vis        = { 0, 0, 0 };
379         vis[dd->dim[d]] = 1;
380         const int is    = IVEC2IS(vis);
381
382         /* Loop over the pulses */
383         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
384         for (int p = cd.numPulses() - 1; p >= 0; p--)
385         {
386             const gmx_domdec_ind_t&   ind = cd.ind[p];
387             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
388             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
389
390             nat_tot -= ind.nrecv[nzone + 1];
391
392             DDBufferAccess<gmx::RVec> sendBufferAccess(
393                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
394
395             gmx::ArrayRef<gmx::RVec> sendBuffer;
396             if (cd.receiveInPlace)
397             {
398                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
399             }
400             else
401             {
402                 sendBuffer = sendBufferAccess.buffer;
403                 int j      = 0;
404                 for (int zone = 0; zone < nzone; zone++)
405                 {
406                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
407                     {
408                         sendBuffer[j++] = f[i];
409                     }
410                 }
411             }
412             /* Communicate the forces */
413             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
414             /* Add the received forces */
415             int n = 0;
416             if (!shiftForcesNeedPbc)
417             {
418                 for (int j : ind.index)
419                 {
420                     for (int d = 0; d < DIM; d++)
421                     {
422                         f[j][d] += receiveBuffer[n][d];
423                     }
424                     n++;
425                 }
426             }
427             else if (!applyScrewPbc)
428             {
429                 for (int j : ind.index)
430                 {
431                     for (int d = 0; d < DIM; d++)
432                     {
433                         f[j][d] += receiveBuffer[n][d];
434                     }
435                     /* Add this force to the shift force */
436                     for (int d = 0; d < DIM; d++)
437                     {
438                         fshift[is][d] += receiveBuffer[n][d];
439                     }
440                     n++;
441                 }
442             }
443             else
444             {
445                 for (int j : ind.index)
446                 {
447                     /* Rotate the force */
448                     f[j][XX] += receiveBuffer[n][XX];
449                     f[j][YY] -= receiveBuffer[n][YY];
450                     f[j][ZZ] -= receiveBuffer[n][ZZ];
451                     if (shiftForcesNeedPbc)
452                     {
453                         /* Add this force to the shift force */
454                         for (int d = 0; d < DIM; d++)
455                         {
456                             fshift[is][d] += receiveBuffer[n][d];
457                         }
458                     }
459                     n++;
460                 }
461             }
462         }
463         nzone /= 2;
464     }
465     wallcycle_stop(wcycle, ewcMOVEF);
466 }
467
468 /* Convenience function for extracting a real buffer from an rvec buffer
469  *
470  * To reduce the number of temporary communication buffers and avoid
471  * cache polution, we reuse gmx::RVec buffers for storing reals.
472  * This functions return a real buffer reference with the same number
473  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
474  */
475 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
476 {
477     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
478 }
479
480 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
481 {
482     int                    nzone, nat_tot;
483     gmx_domdec_comm_t*     comm;
484     gmx_domdec_comm_dim_t* cd;
485
486     comm = dd->comm;
487
488     nzone   = 1;
489     nat_tot = comm->atomRanges.numHomeAtoms();
490     for (int d = 0; d < dd->ndim; d++)
491     {
492         cd = &comm->cd[d];
493         for (const gmx_domdec_ind_t& ind : cd->ind)
494         {
495             /* Note: We provision for RVec instead of real, so a factor of 3
496              * more than needed. The buffer actually already has this size
497              * and we pass a plain pointer below, so this does not matter.
498              */
499             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
500             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
501             int                 n          = 0;
502             for (int j : ind.index)
503             {
504                 sendBuffer[n++] = v[j];
505             }
506
507             DDBufferAccess<gmx::RVec> receiveBufferAccess(
508                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
509
510             gmx::ArrayRef<real> receiveBuffer;
511             if (cd->receiveInPlace)
512             {
513                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
514             }
515             else
516             {
517                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
518             }
519             /* Send and receive the data */
520             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
521             if (!cd->receiveInPlace)
522             {
523                 int j = 0;
524                 for (int zone = 0; zone < nzone; zone++)
525                 {
526                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
527                     {
528                         v[i] = receiveBuffer[j++];
529                     }
530                 }
531             }
532             nat_tot += ind.nrecv[nzone + 1];
533         }
534         nzone += nzone;
535     }
536 }
537
538 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
539 {
540     int                    nzone, nat_tot;
541     gmx_domdec_comm_t*     comm;
542     gmx_domdec_comm_dim_t* cd;
543
544     comm = dd->comm;
545
546     nzone   = comm->zones.n / 2;
547     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
548     for (int d = dd->ndim - 1; d >= 0; d--)
549     {
550         cd = &comm->cd[d];
551         for (int p = cd->numPulses() - 1; p >= 0; p--)
552         {
553             const gmx_domdec_ind_t& ind = cd->ind[p];
554
555             /* Note: We provision for RVec instead of real, so a factor of 3
556              * more than needed. The buffer actually already has this size
557              * and we typecast, so this works as intended.
558              */
559             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
560             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
561             nat_tot -= ind.nrecv[nzone + 1];
562
563             DDBufferAccess<gmx::RVec> sendBufferAccess(
564                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
565
566             gmx::ArrayRef<real> sendBuffer;
567             if (cd->receiveInPlace)
568             {
569                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
570             }
571             else
572             {
573                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
574                 int j      = 0;
575                 for (int zone = 0; zone < nzone; zone++)
576                 {
577                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
578                     {
579                         sendBuffer[j++] = v[i];
580                     }
581                 }
582             }
583             /* Communicate the forces */
584             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
585             /* Add the received forces */
586             int n = 0;
587             for (int j : ind.index)
588             {
589                 v[j] += receiveBuffer[n];
590                 n++;
591             }
592         }
593         nzone /= 2;
594     }
595 }
596
597 real dd_cutoff_multibody(const gmx_domdec_t* dd)
598 {
599     const gmx_domdec_comm_t& comm       = *dd->comm;
600     const DDSystemInfo&      systemInfo = comm.systemInfo;
601
602     real r = -1;
603     if (systemInfo.haveInterDomainMultiBodyBondeds)
604     {
605         if (comm.cutoff_mbody > 0)
606         {
607             r = comm.cutoff_mbody;
608         }
609         else
610         {
611             /* cutoff_mbody=0 means we do not have DLB */
612             r = comm.cellsize_min[dd->dim[0]];
613             for (int di = 1; di < dd->ndim; di++)
614             {
615                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
616             }
617             if (comm.systemInfo.filterBondedCommunication)
618             {
619                 r = std::max(r, comm.cutoff_mbody);
620             }
621             else
622             {
623                 r = std::min(r, systemInfo.cutoff);
624             }
625         }
626     }
627
628     return r;
629 }
630
631 real dd_cutoff_twobody(const gmx_domdec_t* dd)
632 {
633     real r_mb;
634
635     r_mb = dd_cutoff_multibody(dd);
636
637     return std::max(dd->comm->systemInfo.cutoff, r_mb);
638 }
639
640
641 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
642                                    const CartesianRankSetup& cartSetup,
643                                    const ivec                coord,
644                                    ivec                      coord_pme)
645 {
646     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
647     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
648     copy_ivec(coord, coord_pme);
649     coord_pme[cartSetup.cartpmedim] =
650             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
651 }
652
653 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
654 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
655 {
656     const int npp  = ddRankSetup.numPPRanks;
657     const int npme = ddRankSetup.numRanksDoingPme;
658
659     /* Here we assign a PME node to communicate with this DD node
660      * by assuming that the major index of both is x.
661      * We add npme/2 to obtain an even distribution.
662      */
663     return (ddCellIndex * npme + npme / 2) / npp;
664 }
665
666 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
667 {
668     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
669
670     int n = 0;
671     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
672     {
673         const int p0 = ddindex2pmeindex(ddRankSetup, i);
674         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
675         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
676         {
677             if (debug)
678             {
679                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
680             }
681             pmeRanks[n] = i + 1 + n;
682             n++;
683         }
684     }
685
686     return pmeRanks;
687 }
688
689 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
690 {
691     gmx_domdec_t* dd;
692     ivec          coords;
693     int           slab;
694
695     dd         = cr->dd;
696     coords[XX] = x;
697     coords[YY] = y;
698     coords[ZZ] = z;
699     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
700
701     return slab;
702 }
703
704 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
705 {
706     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
707     ivec                      coords    = { x, y, z };
708     int                       nodeid    = -1;
709
710     if (cartSetup.bCartesianPP_PME)
711     {
712 #if GMX_MPI
713         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
714 #endif
715     }
716     else
717     {
718         int ddindex = dd_index(cr->dd->numCells, coords);
719         if (cartSetup.bCartesianPP)
720         {
721             nodeid = cartSetup.ddindex2simnodeid[ddindex];
722         }
723         else
724         {
725             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
726             {
727                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
728             }
729             else
730             {
731                 nodeid = ddindex;
732             }
733         }
734     }
735
736     return nodeid;
737 }
738
739 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
740                               const CartesianRankSetup& cartSetup,
741                               gmx::ArrayRef<const int>  pmeRanks,
742                               const t_commrec gmx_unused* cr,
743                               const int                   sim_nodeid)
744 {
745     int pmenode = -1;
746
747     /* This assumes a uniform x domain decomposition grid cell size */
748     if (cartSetup.bCartesianPP_PME)
749     {
750 #if GMX_MPI
751         ivec coord, coord_pme;
752         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
753         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
754         {
755             /* This is a PP rank */
756             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
757             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
758         }
759 #endif
760     }
761     else if (cartSetup.bCartesianPP)
762     {
763         if (sim_nodeid < ddRankSetup.numPPRanks)
764         {
765             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
766         }
767     }
768     else
769     {
770         /* This assumes DD cells with identical x coordinates
771          * are numbered sequentially.
772          */
773         if (pmeRanks.empty())
774         {
775             if (sim_nodeid < ddRankSetup.numPPRanks)
776             {
777                 /* The DD index equals the nodeid */
778                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
779             }
780         }
781         else
782         {
783             int i = 0;
784             while (sim_nodeid > pmeRanks[i])
785             {
786                 i++;
787             }
788             if (sim_nodeid < pmeRanks[i])
789             {
790                 pmenode = pmeRanks[i];
791             }
792         }
793     }
794
795     return pmenode;
796 }
797
798 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
799 {
800     if (dd != nullptr)
801     {
802         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
803     }
804     else
805     {
806         return { 1, 1 };
807     }
808 }
809
810 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
811 {
812     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
813     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
814     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
815                        "This function should only be called when PME-only ranks are in use");
816     std::vector<int> ddranks;
817     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
818
819     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
820     {
821         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
822         {
823             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
824             {
825                 if (cartSetup.bCartesianPP_PME)
826                 {
827                     ivec coord = { x, y, z };
828                     ivec coord_pme;
829                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
830                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
831                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
832                     {
833                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
834                     }
835                 }
836                 else
837                 {
838                     /* The slab corresponds to the nodeid in the PME group */
839                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
840                     {
841                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
842                     }
843                 }
844             }
845         }
846     }
847     return ddranks;
848 }
849
850 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
851 {
852     gmx_bool bReceive = TRUE;
853
854     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
855     if (ddRankSetup.usePmeOnlyRanks)
856     {
857         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
858         if (cartSetup.bCartesianPP_PME)
859         {
860 #if GMX_MPI
861             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
862             ivec coords;
863             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
864             coords[cartSetup.cartpmedim]++;
865             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
866             {
867                 int rank;
868                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
869                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
870                 {
871                     /* This is not the last PP node for pmenode */
872                     bReceive = FALSE;
873                 }
874             }
875 #else
876             GMX_RELEASE_ASSERT(
877                     false,
878                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
879 #endif
880         }
881         else
882         {
883             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
884             if (cr->sim_nodeid + 1 < cr->nnodes
885                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
886             {
887                 /* This is not the last PP node for pmenode */
888                 bReceive = FALSE;
889             }
890         }
891     }
892
893     return bReceive;
894 }
895
896 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
897 {
898     gmx_domdec_comm_t* comm;
899     int                i;
900
901     comm = dd->comm;
902
903     snew(*dim_f, dd->numCells[dim] + 1);
904     (*dim_f)[0] = 0;
905     for (i = 1; i < dd->numCells[dim]; i++)
906     {
907         if (comm->slb_frac[dim])
908         {
909             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
910         }
911         else
912         {
913             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
914         }
915     }
916     (*dim_f)[dd->numCells[dim]] = 1;
917 }
918
919 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
920 {
921     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
922
923     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
924     {
925         ddpme->dim = YY;
926     }
927     else
928     {
929         ddpme->dim = dimind;
930     }
931     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
932
933     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
934
935     if (ddpme->nslab <= 1)
936     {
937         return;
938     }
939
940     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
941     /* Determine for each PME slab the PP location range for dimension dim */
942     snew(ddpme->pp_min, ddpme->nslab);
943     snew(ddpme->pp_max, ddpme->nslab);
944     for (int slab = 0; slab < ddpme->nslab; slab++)
945     {
946         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
947         ddpme->pp_max[slab] = 0;
948     }
949     for (int i = 0; i < dd->nnodes; i++)
950     {
951         ivec xyz;
952         ddindex2xyz(dd->numCells, i, xyz);
953         /* For y only use our y/z slab.
954          * This assumes that the PME x grid size matches the DD grid size.
955          */
956         if (dimind == 0 || xyz[XX] == dd->ci[XX])
957         {
958             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
959             int       slab;
960             if (dimind == 0)
961             {
962                 slab = pmeindex / nso;
963             }
964             else
965             {
966                 slab = pmeindex % ddpme->nslab;
967             }
968             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
969             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
970         }
971     }
972
973     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
974 }
975
976 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
977 {
978     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
979
980     if (ddRankSetup.ddpme[0].dim == XX)
981     {
982         return ddRankSetup.ddpme[0].maxshift;
983     }
984     else
985     {
986         return 0;
987     }
988 }
989
990 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
991 {
992     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
993
994     if (ddRankSetup.ddpme[0].dim == YY)
995     {
996         return ddRankSetup.ddpme[0].maxshift;
997     }
998     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
999     {
1000         return ddRankSetup.ddpme[1].maxshift;
1001     }
1002     else
1003     {
1004         return 0;
1005     }
1006 }
1007
1008 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1009 {
1010     return dd.comm->systemInfo.haveSplitConstraints;
1011 }
1012
1013 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1014 {
1015     return dd.comm->systemInfo.useUpdateGroups;
1016 }
1017
1018 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1019 {
1020     /* Note that the cycles value can be incorrect, either 0 or some
1021      * extremely large value, when our thread migrated to another core
1022      * with an unsynchronized cycle counter. If this happens less often
1023      * that once per nstlist steps, this will not cause issues, since
1024      * we later subtract the maximum value from the sum over nstlist steps.
1025      * A zero count will slightly lower the total, but that's a small effect.
1026      * Note that the main purpose of the subtraction of the maximum value
1027      * is to avoid throwing off the load balancing when stalls occur due
1028      * e.g. system activity or network congestion.
1029      */
1030     dd->comm->cycl[ddCycl] += cycles;
1031     dd->comm->cycl_n[ddCycl]++;
1032     if (cycles > dd->comm->cycl_max[ddCycl])
1033     {
1034         dd->comm->cycl_max[ddCycl] = cycles;
1035     }
1036 }
1037
1038 #if GMX_MPI
1039 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1040 {
1041     MPI_Comm c_row;
1042     int      dim, i, rank;
1043     ivec     loc_c;
1044     gmx_bool bPartOfGroup = FALSE;
1045
1046     dim = dd->dim[dim_ind];
1047     copy_ivec(loc, loc_c);
1048     for (i = 0; i < dd->numCells[dim]; i++)
1049     {
1050         loc_c[dim] = i;
1051         rank       = dd_index(dd->numCells, loc_c);
1052         if (rank == dd->rank)
1053         {
1054             /* This process is part of the group */
1055             bPartOfGroup = TRUE;
1056         }
1057     }
1058     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1059     if (bPartOfGroup)
1060     {
1061         dd->comm->mpi_comm_load[dim_ind] = c_row;
1062         if (!isDlbDisabled(dd->comm))
1063         {
1064             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1065
1066             if (dd->ci[dim] == dd->master_ci[dim])
1067             {
1068                 /* This is the root process of this row */
1069                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1070
1071                 RowMaster& rowMaster = *cellsizes.rowMaster;
1072                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1073                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1074                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1075                 if (dim_ind > 0)
1076                 {
1077                     rowMaster.bounds.resize(dd->numCells[dim]);
1078                 }
1079                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1080             }
1081             else
1082             {
1083                 /* This is not a root process, we only need to receive cell_f */
1084                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1085             }
1086         }
1087         if (dd->ci[dim] == dd->master_ci[dim])
1088         {
1089             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1090         }
1091     }
1092 }
1093 #endif
1094
1095 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1096 {
1097 #if GMX_MPI
1098     int           physicalnode_id_hash;
1099     gmx_domdec_t* dd;
1100     MPI_Comm      mpi_comm_pp_physicalnode;
1101
1102     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1103     {
1104         /* Only ranks with short-ranged tasks (currently) use GPUs.
1105          * If we don't have GPUs assigned, there are no resources to share.
1106          */
1107         return;
1108     }
1109
1110     physicalnode_id_hash = gmx_physicalnode_id_hash();
1111
1112     dd = cr->dd;
1113
1114     if (debug)
1115     {
1116         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1117         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1118     }
1119     /* Split the PP communicator over the physical nodes */
1120     /* TODO: See if we should store this (before), as it's also used for
1121      * for the nodecomm summation.
1122      */
1123     // TODO PhysicalNodeCommunicator could be extended/used to handle
1124     // the need for per-node per-group communicators.
1125     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1126     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1127     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1128     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1129
1130     if (debug)
1131     {
1132         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1133     }
1134
1135     /* Note that some ranks could share a GPU, while others don't */
1136
1137     if (dd->comm->nrank_gpu_shared == 1)
1138     {
1139         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1140     }
1141 #else
1142     GMX_UNUSED_VALUE(cr);
1143     GMX_UNUSED_VALUE(gpu_id);
1144 #endif
1145 }
1146
1147 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1148 {
1149 #if GMX_MPI
1150     int  dim0, dim1, i, j;
1151     ivec loc;
1152
1153     if (debug)
1154     {
1155         fprintf(debug, "Making load communicators\n");
1156     }
1157
1158     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1159     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1160
1161     if (dd->ndim == 0)
1162     {
1163         return;
1164     }
1165
1166     clear_ivec(loc);
1167     make_load_communicator(dd, 0, loc);
1168     if (dd->ndim > 1)
1169     {
1170         dim0 = dd->dim[0];
1171         for (i = 0; i < dd->numCells[dim0]; i++)
1172         {
1173             loc[dim0] = i;
1174             make_load_communicator(dd, 1, loc);
1175         }
1176     }
1177     if (dd->ndim > 2)
1178     {
1179         dim0 = dd->dim[0];
1180         for (i = 0; i < dd->numCells[dim0]; i++)
1181         {
1182             loc[dim0] = i;
1183             dim1      = dd->dim[1];
1184             for (j = 0; j < dd->numCells[dim1]; j++)
1185             {
1186                 loc[dim1] = j;
1187                 make_load_communicator(dd, 2, loc);
1188             }
1189         }
1190     }
1191
1192     if (debug)
1193     {
1194         fprintf(debug, "Finished making load communicators\n");
1195     }
1196 #endif
1197 }
1198
1199 /*! \brief Sets up the relation between neighboring domains and zones */
1200 static void setup_neighbor_relations(gmx_domdec_t* dd)
1201 {
1202     int                 d, dim, m;
1203     ivec                tmp, s;
1204     gmx_domdec_zones_t* zones;
1205     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1206
1207     for (d = 0; d < dd->ndim; d++)
1208     {
1209         dim = dd->dim[d];
1210         copy_ivec(dd->ci, tmp);
1211         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1212         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1213         copy_ivec(dd->ci, tmp);
1214         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1215         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1216         if (debug)
1217         {
1218             fprintf(debug,
1219                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1220                     dd->rank,
1221                     dim,
1222                     dd->neighbor[d][0],
1223                     dd->neighbor[d][1]);
1224         }
1225     }
1226
1227     int nzone  = (1 << dd->ndim);
1228     int nizone = (1 << std::max(dd->ndim - 1, 0));
1229     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1230
1231     zones = &dd->comm->zones;
1232
1233     for (int i = 0; i < nzone; i++)
1234     {
1235         m = 0;
1236         clear_ivec(zones->shift[i]);
1237         for (d = 0; d < dd->ndim; d++)
1238         {
1239             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1240         }
1241     }
1242
1243     zones->n = nzone;
1244     for (int i = 0; i < nzone; i++)
1245     {
1246         for (d = 0; d < DIM; d++)
1247         {
1248             s[d] = dd->ci[d] - zones->shift[i][d];
1249             if (s[d] < 0)
1250             {
1251                 s[d] += dd->numCells[d];
1252             }
1253             else if (s[d] >= dd->numCells[d])
1254             {
1255                 s[d] -= dd->numCells[d];
1256             }
1257         }
1258     }
1259     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1260     {
1261         GMX_RELEASE_ASSERT(
1262                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1263                 "The first element for each ddNonbondedZonePairRanges should match its index");
1264
1265         DDPairInteractionRanges iZone;
1266         iZone.iZoneIndex = iZoneIndex;
1267         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1268          * j-zones up to nzone.
1269          */
1270         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1271                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1272         for (dim = 0; dim < DIM; dim++)
1273         {
1274             if (dd->numCells[dim] == 1)
1275             {
1276                 /* All shifts should be allowed */
1277                 iZone.shift0[dim] = -1;
1278                 iZone.shift1[dim] = 1;
1279             }
1280             else
1281             {
1282                 /* Determine the min/max j-zone shift wrt the i-zone */
1283                 iZone.shift0[dim] = 1;
1284                 iZone.shift1[dim] = -1;
1285                 for (int jZone : iZone.jZoneRange)
1286                 {
1287                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1288                     if (shift_diff < iZone.shift0[dim])
1289                     {
1290                         iZone.shift0[dim] = shift_diff;
1291                     }
1292                     if (shift_diff > iZone.shift1[dim])
1293                     {
1294                         iZone.shift1[dim] = shift_diff;
1295                     }
1296                 }
1297             }
1298         }
1299
1300         zones->iZones.push_back(iZone);
1301     }
1302
1303     if (!isDlbDisabled(dd->comm))
1304     {
1305         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1306     }
1307
1308     if (dd->comm->ddSettings.recordLoad)
1309     {
1310         make_load_communicators(dd);
1311     }
1312 }
1313
1314 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1315                                  gmx_domdec_t*        dd,
1316                                  t_commrec gmx_unused* cr,
1317                                  bool gmx_unused reorder)
1318 {
1319 #if GMX_MPI
1320     gmx_domdec_comm_t*  comm      = dd->comm;
1321     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1322
1323     if (cartSetup.bCartesianPP)
1324     {
1325         /* Set up cartesian communication for the particle-particle part */
1326         GMX_LOG(mdlog.info)
1327                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1328                                      dd->numCells[XX],
1329                                      dd->numCells[YY],
1330                                      dd->numCells[ZZ]);
1331
1332         ivec periods;
1333         for (int i = 0; i < DIM; i++)
1334         {
1335             periods[i] = TRUE;
1336         }
1337         MPI_Comm comm_cart;
1338         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1339         /* We overwrite the old communicator with the new cartesian one */
1340         cr->mpi_comm_mygroup = comm_cart;
1341     }
1342
1343     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1344     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1345
1346     if (cartSetup.bCartesianPP_PME)
1347     {
1348         /* Since we want to use the original cartesian setup for sim,
1349          * and not the one after split, we need to make an index.
1350          */
1351         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1352         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1353         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1354         /* Get the rank of the DD master,
1355          * above we made sure that the master node is a PP node.
1356          */
1357         int rank;
1358         if (MASTER(cr))
1359         {
1360             rank = dd->rank;
1361         }
1362         else
1363         {
1364             rank = 0;
1365         }
1366         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1367     }
1368     else if (cartSetup.bCartesianPP)
1369     {
1370         if (!comm->ddRankSetup.usePmeOnlyRanks)
1371         {
1372             /* The PP communicator is also
1373              * the communicator for this simulation
1374              */
1375             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1376         }
1377         cr->nodeid = dd->rank;
1378
1379         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1380
1381         /* We need to make an index to go from the coordinates
1382          * to the nodeid of this simulation.
1383          */
1384         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1385         std::vector<int> buf(dd->nnodes);
1386         if (thisRankHasDuty(cr, DUTY_PP))
1387         {
1388             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1389         }
1390         /* Communicate the ddindex to simulation nodeid index */
1391         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1392
1393         /* Determine the master coordinates and rank.
1394          * The DD master should be the same node as the master of this sim.
1395          */
1396         for (int i = 0; i < dd->nnodes; i++)
1397         {
1398             if (cartSetup.ddindex2simnodeid[i] == 0)
1399             {
1400                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1401                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1402             }
1403         }
1404         if (debug)
1405         {
1406             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1407         }
1408     }
1409     else
1410     {
1411         /* No Cartesian communicators */
1412         /* We use the rank in dd->comm->all as DD index */
1413         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1414         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1415         dd->masterrank = 0;
1416         clear_ivec(dd->master_ci);
1417     }
1418 #endif
1419
1420     GMX_LOG(mdlog.info)
1421             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1422                                  dd->rank,
1423                                  dd->ci[XX],
1424                                  dd->ci[YY],
1425                                  dd->ci[ZZ]);
1426     if (debug)
1427     {
1428         fprintf(debug,
1429                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1430                 dd->rank,
1431                 dd->ci[XX],
1432                 dd->ci[YY],
1433                 dd->ci[ZZ]);
1434     }
1435 }
1436
1437 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1438 {
1439 #if GMX_MPI
1440     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1441
1442     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1443     {
1444         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1445         std::vector<int> buf(dd->nnodes);
1446         if (thisRankHasDuty(cr, DUTY_PP))
1447         {
1448             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1449         }
1450         /* Communicate the ddindex to simulation nodeid index */
1451         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1452     }
1453 #else
1454     GMX_UNUSED_VALUE(dd);
1455     GMX_UNUSED_VALUE(cr);
1456 #endif
1457 }
1458
1459 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1460                                              t_commrec*           cr,
1461                                              const DdRankOrder    ddRankOrder,
1462                                              bool gmx_unused    reorder,
1463                                              const DDRankSetup& ddRankSetup,
1464                                              ivec               ddCellIndex,
1465                                              std::vector<int>*  pmeRanks)
1466 {
1467     CartesianRankSetup cartSetup;
1468
1469     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1470     cartSetup.bCartesianPP_PME = false;
1471
1472     const ivec& numDDCells = ddRankSetup.numPPCells;
1473     /* Initially we set ntot to the number of PP cells */
1474     copy_ivec(numDDCells, cartSetup.ntot);
1475
1476     if (cartSetup.bCartesianPP)
1477     {
1478         const int numDDCellsTot = ddRankSetup.numPPRanks;
1479         bool      bDiv[DIM];
1480         for (int i = 1; i < DIM; i++)
1481         {
1482             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1483         }
1484         if (bDiv[YY] || bDiv[ZZ])
1485         {
1486             cartSetup.bCartesianPP_PME = TRUE;
1487             /* If we have 2D PME decomposition, which is always in x+y,
1488              * we stack the PME only nodes in z.
1489              * Otherwise we choose the direction that provides the thinnest slab
1490              * of PME only nodes as this will have the least effect
1491              * on the PP communication.
1492              * But for the PME communication the opposite might be better.
1493              */
1494             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1495             {
1496                 cartSetup.cartpmedim = ZZ;
1497             }
1498             else
1499             {
1500                 cartSetup.cartpmedim = YY;
1501             }
1502             cartSetup.ntot[cartSetup.cartpmedim] +=
1503                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1504         }
1505         else
1506         {
1507             GMX_LOG(mdlog.info)
1508                     .appendTextFormatted(
1509                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1510                             "nx*nz (%d*%d)",
1511                             ddRankSetup.numRanksDoingPme,
1512                             numDDCells[XX],
1513                             numDDCells[YY],
1514                             numDDCells[XX],
1515                             numDDCells[ZZ]);
1516             GMX_LOG(mdlog.info)
1517                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1518         }
1519     }
1520
1521     if (cartSetup.bCartesianPP_PME)
1522     {
1523 #if GMX_MPI
1524         int  rank;
1525         ivec periods;
1526
1527         GMX_LOG(mdlog.info)
1528                 .appendTextFormatted(
1529                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1530                         cartSetup.ntot[XX],
1531                         cartSetup.ntot[YY],
1532                         cartSetup.ntot[ZZ]);
1533
1534         for (int i = 0; i < DIM; i++)
1535         {
1536             periods[i] = TRUE;
1537         }
1538         MPI_Comm comm_cart;
1539         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1540         MPI_Comm_rank(comm_cart, &rank);
1541         if (MASTER(cr) && rank != 0)
1542         {
1543             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1544         }
1545
1546         /* With this assigment we loose the link to the original communicator
1547          * which will usually be MPI_COMM_WORLD, unless have multisim.
1548          */
1549         cr->mpi_comm_mysim = comm_cart;
1550         cr->sim_nodeid     = rank;
1551
1552         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1553
1554         GMX_LOG(mdlog.info)
1555                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1556                                      cr->sim_nodeid,
1557                                      ddCellIndex[XX],
1558                                      ddCellIndex[YY],
1559                                      ddCellIndex[ZZ]);
1560
1561         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1562         {
1563             cr->duty = DUTY_PP;
1564         }
1565         if (!ddRankSetup.usePmeOnlyRanks
1566             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1567         {
1568             cr->duty = DUTY_PME;
1569         }
1570
1571         /* Split the sim communicator into PP and PME only nodes */
1572         MPI_Comm_split(cr->mpi_comm_mysim,
1573                        getThisRankDuties(cr),
1574                        dd_index(cartSetup.ntot, ddCellIndex),
1575                        &cr->mpi_comm_mygroup);
1576 #else
1577         GMX_UNUSED_VALUE(ddCellIndex);
1578 #endif
1579     }
1580     else
1581     {
1582         switch (ddRankOrder)
1583         {
1584             case DdRankOrder::pp_pme:
1585                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1586                 break;
1587             case DdRankOrder::interleave:
1588                 /* Interleave the PP-only and PME-only ranks */
1589                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1590                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1591                 break;
1592             case DdRankOrder::cartesian: break;
1593             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1594         }
1595
1596         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1597         {
1598             cr->duty = DUTY_PME;
1599         }
1600         else
1601         {
1602             cr->duty = DUTY_PP;
1603         }
1604 #if GMX_MPI
1605         /* Split the sim communicator into PP and PME only nodes */
1606         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1607         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1608 #endif
1609     }
1610
1611     GMX_LOG(mdlog.info)
1612             .appendTextFormatted("This rank does only %s work.\n",
1613                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1614
1615     return cartSetup;
1616 }
1617
1618 /*! \brief Makes the PP communicator and the PME communicator, when needed
1619  *
1620  * Returns the Cartesian rank setup.
1621  * Sets \p cr->mpi_comm_mygroup
1622  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1623  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1624  */
1625 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1626                                                  const DDSettings&    ddSettings,
1627                                                  const DdRankOrder    ddRankOrder,
1628                                                  const DDRankSetup&   ddRankSetup,
1629                                                  t_commrec*           cr,
1630                                                  ivec                 ddCellIndex,
1631                                                  std::vector<int>*    pmeRanks)
1632 {
1633     CartesianRankSetup cartSetup;
1634
1635     // As a default, both group and sim communicators are equal to the default communicator
1636     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1637     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1638     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1639     cr->nodeid           = cr->rankInDefaultCommunicator;
1640     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1641
1642     if (ddRankSetup.usePmeOnlyRanks)
1643     {
1644         /* Split the communicator into a PP and PME part */
1645         cartSetup = split_communicator(
1646                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1647     }
1648     else
1649     {
1650         /* All nodes do PP and PME */
1651         /* We do not require separate communicators */
1652         cartSetup.bCartesianPP     = false;
1653         cartSetup.bCartesianPP_PME = false;
1654     }
1655
1656     return cartSetup;
1657 }
1658
1659 /*! \brief For PP ranks, sets or makes the communicator
1660  *
1661  * For PME ranks get the rank id.
1662  * For PP only ranks, sets the PME-only rank.
1663  */
1664 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1665                                     const DDSettings&        ddSettings,
1666                                     gmx::ArrayRef<const int> pmeRanks,
1667                                     t_commrec*               cr,
1668                                     const int                numAtomsInSystem,
1669                                     gmx_domdec_t*            dd)
1670 {
1671     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1672     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1673
1674     if (thisRankHasDuty(cr, DUTY_PP))
1675     {
1676         /* Copy or make a new PP communicator */
1677
1678         /* We (possibly) reordered the nodes in split_communicator,
1679          * so it is no longer required in make_pp_communicator.
1680          */
1681         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1682
1683         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1684     }
1685     else
1686     {
1687         receive_ddindex2simnodeid(dd, cr);
1688     }
1689
1690     if (!thisRankHasDuty(cr, DUTY_PME))
1691     {
1692         /* Set up the commnuication to our PME node */
1693         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1694         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1695         if (debug)
1696         {
1697             fprintf(debug,
1698                     "My pme_nodeid %d receive ener %s\n",
1699                     dd->pme_nodeid,
1700                     gmx::boolToString(dd->pme_receive_vir_ener));
1701         }
1702     }
1703     else
1704     {
1705         dd->pme_nodeid = -1;
1706     }
1707
1708     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1709     if (MASTER(cr))
1710     {
1711         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1712     }
1713 }
1714
1715 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1716 {
1717     real * slb_frac, tot;
1718     int    i, n;
1719     double dbl;
1720
1721     slb_frac = nullptr;
1722     if (nc > 1 && size_string != nullptr)
1723     {
1724         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1725         snew(slb_frac, nc);
1726         tot = 0;
1727         for (i = 0; i < nc; i++)
1728         {
1729             dbl = 0;
1730             sscanf(size_string, "%20lf%n", &dbl, &n);
1731             if (dbl == 0)
1732             {
1733                 gmx_fatal(FARGS,
1734                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1735                           dir,
1736                           size_string);
1737             }
1738             slb_frac[i] = dbl;
1739             size_string += n;
1740             tot += slb_frac[i];
1741         }
1742         /* Normalize */
1743         std::string relativeCellSizes = "Relative cell sizes:";
1744         for (i = 0; i < nc; i++)
1745         {
1746             slb_frac[i] /= tot;
1747             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1748         }
1749         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1750     }
1751
1752     return slb_frac;
1753 }
1754
1755 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1756 {
1757     int                  n     = 0;
1758     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1759     int                  nmol;
1760     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1761     {
1762         for (auto& ilist : extractILists(*ilists, IF_BOND))
1763         {
1764             if (NRAL(ilist.functionType) > 2)
1765             {
1766                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1767             }
1768         }
1769     }
1770
1771     return n;
1772 }
1773
1774 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1775 {
1776     char* val;
1777     int   nst;
1778
1779     nst = def;
1780     val = getenv(env_var);
1781     if (val)
1782     {
1783         if (sscanf(val, "%20d", &nst) <= 0)
1784         {
1785             nst = 1;
1786         }
1787         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1788     }
1789
1790     return nst;
1791 }
1792
1793 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1794 {
1795     if (ir->pbcType == PbcType::Screw
1796         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1797     {
1798         gmx_fatal(FARGS,
1799                   "With pbc=%s can only do domain decomposition in the x-direction",
1800                   c_pbcTypeNames[ir->pbcType].c_str());
1801     }
1802
1803     if (ir->nstlist == 0)
1804     {
1805         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1806     }
1807
1808     if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1809     {
1810         GMX_LOG(mdlog.warning)
1811                 .appendText(
1812                         "comm-mode angular will give incorrect results when the comm group "
1813                         "partially crosses a periodic boundary");
1814     }
1815 }
1816
1817 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1818 {
1819     real r = ddbox.box_size[XX];
1820     for (int d = 0; d < DIM; d++)
1821     {
1822         if (numDomains[d] > 1)
1823         {
1824             /* Check using the initial average cell size */
1825             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1826         }
1827     }
1828
1829     return r;
1830 }
1831
1832 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1833  */
1834 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1835                                   const std::string&   reasonStr,
1836                                   const gmx::MDLogger& mdlog)
1837 {
1838     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1839     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1840
1841     if (cmdlineDlbState == DlbState::onUser)
1842     {
1843         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1844     }
1845     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1846     {
1847         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1848     }
1849     return DlbState::offForever;
1850 }
1851
1852 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1853  *
1854  * This function parses the parameters of "-dlb" command line option setting
1855  * corresponding state values. Then it checks the consistency of the determined
1856  * state with other run parameters and settings. As a result, the initial state
1857  * may be altered or an error may be thrown if incompatibility of options is detected.
1858  *
1859  * \param [in] mdlog       Logger.
1860  * \param [in] dlbOption   Enum value for the DLB option.
1861  * \param [in] bRecordLoad True if the load balancer is recording load information.
1862  * \param [in] mdrunOptions  Options for mdrun.
1863  * \param [in] ir          Pointer mdrun to input parameters.
1864  * \returns                DLB initial/startup state.
1865  */
1866 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1867                                          DlbOption                dlbOption,
1868                                          gmx_bool                 bRecordLoad,
1869                                          const gmx::MdrunOptions& mdrunOptions,
1870                                          const t_inputrec*        ir)
1871 {
1872     DlbState dlbState = DlbState::offCanTurnOn;
1873
1874     switch (dlbOption)
1875     {
1876         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1877         case DlbOption::no: dlbState = DlbState::offUser; break;
1878         case DlbOption::yes: dlbState = DlbState::onUser; break;
1879         default: gmx_incons("Invalid dlbOption enum value");
1880     }
1881
1882     /* Reruns don't support DLB: bail or override auto mode */
1883     if (mdrunOptions.rerun)
1884     {
1885         std::string reasonStr = "it is not supported in reruns.";
1886         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1887     }
1888
1889     /* Unsupported integrators */
1890     if (!EI_DYNAMICS(ir->eI))
1891     {
1892         auto reasonStr = gmx::formatString(
1893                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1894         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1895     }
1896
1897     /* Without cycle counters we can't time work to balance on */
1898     if (!bRecordLoad)
1899     {
1900         std::string reasonStr =
1901                 "cycle counters unsupported or not enabled in the operating system kernel.";
1902         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1903     }
1904
1905     if (mdrunOptions.reproducible)
1906     {
1907         std::string reasonStr = "you started a reproducible run.";
1908         switch (dlbState)
1909         {
1910             case DlbState::offUser: break;
1911             case DlbState::offForever:
1912                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1913                 break;
1914             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1915             case DlbState::onCanTurnOff:
1916                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1917                 break;
1918             case DlbState::onUser:
1919                 return forceDlbOffOrBail(
1920                         dlbState,
1921                         reasonStr
1922                                 + " In load balanced runs binary reproducibility cannot be "
1923                                   "ensured.",
1924                         mdlog);
1925             default:
1926                 gmx_fatal(FARGS,
1927                           "Death horror: undefined case (%d) for load balancing choice",
1928                           static_cast<int>(dlbState));
1929         }
1930     }
1931
1932     return dlbState;
1933 }
1934
1935 static gmx_domdec_comm_t* init_dd_comm()
1936 {
1937     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1938
1939     comm->n_load_have    = 0;
1940     comm->n_load_collect = 0;
1941
1942     comm->haveTurnedOffDlb = false;
1943
1944     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1945     {
1946         comm->sum_nat[i] = 0;
1947     }
1948     comm->ndecomp   = 0;
1949     comm->nload     = 0;
1950     comm->load_step = 0;
1951     comm->load_sum  = 0;
1952     comm->load_max  = 0;
1953     clear_ivec(comm->load_lim);
1954     comm->load_mdf = 0;
1955     comm->load_pme = 0;
1956
1957     /* This should be replaced by a unique pointer */
1958     comm->balanceRegion = ddBalanceRegionAllocate();
1959
1960     return comm;
1961 }
1962
1963 /* Returns whether mtop contains constraints and/or vsites */
1964 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1965 {
1966     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1967     int  nmol;
1968     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1969     {
1970         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1971         {
1972             return true;
1973         }
1974     }
1975
1976     return false;
1977 }
1978
1979 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1980                               const gmx_mtop_t&    mtop,
1981                               const t_inputrec&    inputrec,
1982                               const real           cutoffMargin,
1983                               DDSystemInfo*        systemInfo)
1984 {
1985     /* When we have constraints and/or vsites, it is beneficial to use
1986      * update groups (when possible) to allow independent update of groups.
1987      */
1988     if (!systemHasConstraintsOrVsites(mtop))
1989     {
1990         /* No constraints or vsites, atoms can be updated independently */
1991         return;
1992     }
1993
1994     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1995     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1996                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1997
1998     if (systemInfo->useUpdateGroups)
1999     {
2000         int numUpdateGroups = 0;
2001         for (const auto& molblock : mtop.molblock)
2002         {
2003             numUpdateGroups += molblock.nmol
2004                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2005         }
2006
2007         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
2008                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
2009
2010         /* To use update groups, the large domain-to-domain cutoff distance
2011          * should be compatible with the box size.
2012          */
2013         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2014
2015         if (systemInfo->useUpdateGroups)
2016         {
2017             GMX_LOG(mdlog.info)
2018                     .appendTextFormatted(
2019                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
2020                             "nm\n",
2021                             numUpdateGroups,
2022                             mtop.natoms / static_cast<double>(numUpdateGroups),
2023                             systemInfo->maxUpdateGroupRadius);
2024         }
2025         else
2026         {
2027             GMX_LOG(mdlog.info)
2028                     .appendTextFormatted(
2029                             "The combination of rlist and box size prohibits the use of update "
2030                             "groups\n");
2031             systemInfo->updateGroupingPerMoleculetype.clear();
2032         }
2033     }
2034 }
2035
2036 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2037     npbcdim(numPbcDimensions(ir.pbcType)),
2038     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2039     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2040     haveScrewPBC(ir.pbcType == PbcType::Screw)
2041 {
2042 }
2043
2044 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2045 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2046                                     const bool                                  useUpdateGroups,
2047                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2048 {
2049     if (useUpdateGroups)
2050     {
2051         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2052                            "Need one grouping per moltype");
2053         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2054         {
2055             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2056             {
2057                 return false;
2058             }
2059         }
2060     }
2061     else
2062     {
2063         for (const auto& moltype : mtop.moltype)
2064         {
2065             if (moltype.atoms.nr > 1)
2066             {
2067                 return false;
2068             }
2069         }
2070     }
2071
2072     return true;
2073 }
2074
2075 /*! \brief Generate the simulation system information */
2076 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2077                                   DDRole                         ddRole,
2078                                   MPI_Comm                       communicator,
2079                                   const DomdecOptions&           options,
2080                                   const gmx_mtop_t&              mtop,
2081                                   const t_inputrec&              ir,
2082                                   const matrix                   box,
2083                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2084 {
2085     const real tenPercentMargin = 1.1;
2086
2087     DDSystemInfo systemInfo;
2088
2089     /* We need to decide on update groups early, as this affects communication distances */
2090     systemInfo.useUpdateGroups = false;
2091     if (ir.cutoff_scheme == ecutsVERLET)
2092     {
2093         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2094         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2095     }
2096
2097     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2098             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2099     systemInfo.haveInterDomainBondeds =
2100             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2101     systemInfo.haveInterDomainMultiBodyBondeds =
2102             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2103
2104     if (systemInfo.useUpdateGroups)
2105     {
2106         systemInfo.haveSplitConstraints = false;
2107         systemInfo.haveSplitSettles     = false;
2108     }
2109     else
2110     {
2111         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2112                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2113         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2114     }
2115
2116     if (ir.rlist == 0)
2117     {
2118         /* Set the cut-off to some very large value,
2119          * so we don't need if statements everywhere in the code.
2120          * We use sqrt, since the cut-off is squared in some places.
2121          */
2122         systemInfo.cutoff = GMX_CUTOFF_INF;
2123     }
2124     else
2125     {
2126         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2127     }
2128     systemInfo.minCutoffForMultiBody = 0;
2129
2130     /* Determine the minimum cell size limit, affected by many factors */
2131     systemInfo.cellsizeLimit             = 0;
2132     systemInfo.filterBondedCommunication = false;
2133
2134     /* We do not allow home atoms to move beyond the neighboring domain
2135      * between domain decomposition steps, which limits the cell size.
2136      * Get an estimate of cell size limit due to atom displacement.
2137      * In most cases this is a large overestimate, because it assumes
2138      * non-interaction atoms.
2139      * We set the chance to 1 in a trillion steps.
2140      */
2141     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2142     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2143             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2144     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2145
2146     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2147
2148     /* TODO: PME decomposition currently requires atoms not to be more than
2149      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2150      *       In nearly all cases, limitForAtomDisplacement will be smaller
2151      *       than 2/3*rlist, so the PME requirement is satisfied.
2152      *       But it would be better for both correctness and performance
2153      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2154      *       Note that we would need to improve the pairlist buffer case.
2155      */
2156
2157     if (systemInfo.haveInterDomainBondeds)
2158     {
2159         if (options.minimumCommunicationRange > 0)
2160         {
2161             systemInfo.minCutoffForMultiBody =
2162                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2163             if (options.useBondedCommunication)
2164             {
2165                 systemInfo.filterBondedCommunication =
2166                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2167             }
2168             else
2169             {
2170                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2171             }
2172         }
2173         else if (ir.bPeriodicMols)
2174         {
2175             /* Can not easily determine the required cut-off */
2176             GMX_LOG(mdlog.warning)
2177                     .appendText(
2178                             "NOTE: Periodic molecules are present in this system. Because of this, "
2179                             "the domain decomposition algorithm cannot easily determine the "
2180                             "minimum cell size that it requires for treating bonded interactions. "
2181                             "Instead, domain decomposition will assume that half the non-bonded "
2182                             "cut-off will be a suitable lower bound.");
2183             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2184         }
2185         else
2186         {
2187             real r_2b, r_mb;
2188
2189             if (ddRole == DDRole::Master)
2190             {
2191                 dd_bonded_cg_distance(
2192                         mdlog, &mtop, &ir, xGlobal, box, options.checkBondedInteractions, &r_2b, &r_mb);
2193             }
2194             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2195             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2196
2197             /* We use an initial margin of 10% for the minimum cell size,
2198              * except when we are just below the non-bonded cut-off.
2199              */
2200             if (options.useBondedCommunication)
2201             {
2202                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2203                 {
2204                     const real r_bonded              = std::max(r_2b, r_mb);
2205                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2206                     /* This is the (only) place where we turn on the filtering */
2207                     systemInfo.filterBondedCommunication = true;
2208                 }
2209                 else
2210                 {
2211                     const real r_bonded = r_mb;
2212                     systemInfo.minCutoffForMultiBody =
2213                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2214                 }
2215                 /* We determine cutoff_mbody later */
2216                 systemInfo.increaseMultiBodyCutoff = true;
2217             }
2218             else
2219             {
2220                 /* No special bonded communication,
2221                  * simply increase the DD cut-off.
2222                  */
2223                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2224                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2225             }
2226         }
2227         GMX_LOG(mdlog.info)
2228                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2229                                      systemInfo.minCutoffForMultiBody);
2230
2231         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2232     }
2233
2234     systemInfo.constraintCommunicationRange = 0;
2235     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2236     {
2237         /* There is a cell size limit due to the constraints (P-LINCS) */
2238         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2239         GMX_LOG(mdlog.info)
2240                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2241                                      systemInfo.constraintCommunicationRange);
2242         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2243         {
2244             GMX_LOG(mdlog.info)
2245                     .appendText(
2246                             "This distance will limit the DD cell size, you can override this with "
2247                             "-rcon");
2248         }
2249     }
2250     else if (options.constraintCommunicationRange > 0)
2251     {
2252         /* Here we do not check for dd->splitConstraints.
2253          * because one can also set a cell size limit for virtual sites only
2254          * and at this point we don't know yet if there are intercg v-sites.
2255          */
2256         GMX_LOG(mdlog.info)
2257                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2258                                      options.constraintCommunicationRange);
2259         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2260     }
2261     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2262
2263     return systemInfo;
2264 }
2265
2266 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2267  * implemented. */
2268 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2269                              DDRole               ddRole,
2270                              MPI_Comm             communicator,
2271                              int                  numNodes,
2272                              const DomdecOptions& options,
2273                              const DDSettings&    ddSettings,
2274                              const DDSystemInfo&  systemInfo,
2275                              const real           cellsizeLimit,
2276                              const gmx_ddbox_t&   ddbox)
2277 {
2278     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2279     {
2280         char     buf[STRLEN];
2281         gmx_bool bC = (systemInfo.haveSplitConstraints
2282                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2283         sprintf(buf,
2284                 "Change the number of ranks or mdrun option %s%s%s",
2285                 !bC ? "-rdd" : "-rcon",
2286                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2287                 bC ? " or your LINCS settings" : "");
2288
2289         gmx_fatal_collective(FARGS,
2290                              communicator,
2291                              ddRole == DDRole::Master,
2292                              "There is no domain decomposition for %d ranks that is compatible "
2293                              "with the given box and a minimum cell size of %g nm\n"
2294                              "%s\n"
2295                              "Look in the log file for details on the domain decomposition",
2296                              numNodes - ddGridSetup.numPmeOnlyRanks,
2297                              cellsizeLimit,
2298                              buf);
2299     }
2300
2301     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2302     if (acs < cellsizeLimit)
2303     {
2304         if (options.numCells[XX] <= 0)
2305         {
2306             GMX_RELEASE_ASSERT(
2307                     false,
2308                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2309         }
2310         else
2311         {
2312             gmx_fatal_collective(
2313                     FARGS,
2314                     communicator,
2315                     ddRole == DDRole::Master,
2316                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2317                     "options -dd, -rdd or -rcon, see the log file for details",
2318                     acs,
2319                     cellsizeLimit);
2320         }
2321     }
2322
2323     const int numPPRanks =
2324             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2325     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2326     {
2327         gmx_fatal_collective(FARGS,
2328                              communicator,
2329                              ddRole == DDRole::Master,
2330                              "The size of the domain decomposition grid (%d) does not match the "
2331                              "number of PP ranks (%d). The total number of ranks is %d",
2332                              numPPRanks,
2333                              numNodes - ddGridSetup.numPmeOnlyRanks,
2334                              numNodes);
2335     }
2336     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2337     {
2338         gmx_fatal_collective(FARGS,
2339                              communicator,
2340                              ddRole == DDRole::Master,
2341                              "The number of separate PME ranks (%d) is larger than the number of "
2342                              "PP ranks (%d), this is not supported.",
2343                              ddGridSetup.numPmeOnlyRanks,
2344                              numPPRanks);
2345     }
2346 }
2347
2348 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2349 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2350                                   int                  numNodes,
2351                                   const DDGridSetup&   ddGridSetup,
2352                                   const t_inputrec&    ir)
2353 {
2354     GMX_LOG(mdlog.info)
2355             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2356                                  ddGridSetup.numDomains[XX],
2357                                  ddGridSetup.numDomains[YY],
2358                                  ddGridSetup.numDomains[ZZ],
2359                                  ddGridSetup.numPmeOnlyRanks);
2360
2361     DDRankSetup ddRankSetup;
2362
2363     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2364     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2365
2366     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2367     if (ddRankSetup.usePmeOnlyRanks)
2368     {
2369         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2370     }
2371     else
2372     {
2373         ddRankSetup.numRanksDoingPme =
2374                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2375     }
2376
2377     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2378     {
2379         /* The following choices should match those
2380          * in comm_cost_est in domdec_setup.c.
2381          * Note that here the checks have to take into account
2382          * that the decomposition might occur in a different order than xyz
2383          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2384          * in which case they will not match those in comm_cost_est,
2385          * but since that is mainly for testing purposes that's fine.
2386          */
2387         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2388             && ddGridSetup.ddDimensions[1] == YY
2389             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2390             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2391             && getenv("GMX_PMEONEDD") == nullptr)
2392         {
2393             ddRankSetup.npmedecompdim = 2;
2394             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2395             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2396         }
2397         else
2398         {
2399             /* In case nc is 1 in both x and y we could still choose to
2400              * decompose pme in y instead of x, but we use x for simplicity.
2401              */
2402             ddRankSetup.npmedecompdim = 1;
2403             if (ddGridSetup.ddDimensions[0] == YY)
2404             {
2405                 ddRankSetup.npmenodes_x = 1;
2406                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2407             }
2408             else
2409             {
2410                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2411                 ddRankSetup.npmenodes_y = 1;
2412             }
2413         }
2414         GMX_LOG(mdlog.info)
2415                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2416                                      ddRankSetup.npmenodes_x,
2417                                      ddRankSetup.npmenodes_y,
2418                                      1);
2419     }
2420     else
2421     {
2422         ddRankSetup.npmedecompdim = 0;
2423         ddRankSetup.npmenodes_x   = 0;
2424         ddRankSetup.npmenodes_y   = 0;
2425     }
2426
2427     return ddRankSetup;
2428 }
2429
2430 /*! \brief Set the cell size and interaction limits */
2431 static void set_dd_limits(const gmx::MDLogger& mdlog,
2432                           DDRole               ddRole,
2433                           gmx_domdec_t*        dd,
2434                           const DomdecOptions& options,
2435                           const DDSettings&    ddSettings,
2436                           const DDSystemInfo&  systemInfo,
2437                           const DDGridSetup&   ddGridSetup,
2438                           const int            numPPRanks,
2439                           const gmx_mtop_t*    mtop,
2440                           const t_inputrec*    ir,
2441                           const gmx_ddbox_t&   ddbox)
2442 {
2443     gmx_domdec_comm_t* comm = dd->comm;
2444     comm->ddSettings        = ddSettings;
2445
2446     /* Initialize to GPU share count to 0, might change later */
2447     comm->nrank_gpu_shared = 0;
2448
2449     comm->dlbState = comm->ddSettings.initialDlbState;
2450     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2451     /* To consider turning DLB on after 2*nstlist steps we need to check
2452      * at partitioning count 3. Thus we need to increase the first count by 2.
2453      */
2454     comm->ddPartioningCountFirstDlbOff += 2;
2455
2456     comm->bPMELoadBalDLBLimits = FALSE;
2457
2458     /* Allocate the charge group/atom sorting struct */
2459     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2460
2461     comm->systemInfo = systemInfo;
2462
2463     if (systemInfo.useUpdateGroups)
2464     {
2465         /* Note: We would like to use dd->nnodes for the atom count estimate,
2466          *       but that is not yet available here. But this anyhow only
2467          *       affect performance up to the second dd_partition_system call.
2468          */
2469         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2470         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2471                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir), homeAtomCountEstimate);
2472     }
2473
2474     /* Set the DD setup given by ddGridSetup */
2475     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2476     dd->ndim = ddGridSetup.numDDDimensions;
2477     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2478
2479     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2480
2481     snew(comm->slb_frac, DIM);
2482     if (isDlbDisabled(comm))
2483     {
2484         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2485         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2486         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2487     }
2488
2489     /* Set the multi-body cut-off and cellsize limit for DLB */
2490     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2491     comm->cellsize_limit = systemInfo.cellsizeLimit;
2492     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2493     {
2494         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2495         {
2496             /* Set the bonded communication distance to halfway
2497              * the minimum and the maximum,
2498              * since the extra communication cost is nearly zero.
2499              */
2500             real acs           = average_cellsize_min(ddbox, dd->numCells);
2501             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2502             if (!isDlbDisabled(comm))
2503             {
2504                 /* Check if this does not limit the scaling */
2505                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2506             }
2507             if (!systemInfo.filterBondedCommunication)
2508             {
2509                 /* Without bBondComm do not go beyond the n.b. cut-off */
2510                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2511                 if (comm->cellsize_limit >= systemInfo.cutoff)
2512                 {
2513                     /* We don't loose a lot of efficieny
2514                      * when increasing it to the n.b. cut-off.
2515                      * It can even be slightly faster, because we need
2516                      * less checks for the communication setup.
2517                      */
2518                     comm->cutoff_mbody = systemInfo.cutoff;
2519                 }
2520             }
2521             /* Check if we did not end up below our original limit */
2522             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2523
2524             if (comm->cutoff_mbody > comm->cellsize_limit)
2525             {
2526                 comm->cellsize_limit = comm->cutoff_mbody;
2527             }
2528         }
2529         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2530     }
2531
2532     if (debug)
2533     {
2534         fprintf(debug,
2535                 "Bonded atom communication beyond the cut-off: %s\n"
2536                 "cellsize limit %f\n",
2537                 gmx::boolToString(systemInfo.filterBondedCommunication),
2538                 comm->cellsize_limit);
2539     }
2540
2541     if (ddRole == DDRole::Master)
2542     {
2543         check_dd_restrictions(dd, ir, mdlog);
2544     }
2545 }
2546
2547 void dd_init_bondeds(FILE*                           fplog,
2548                      gmx_domdec_t*                   dd,
2549                      const gmx_mtop_t&               mtop,
2550                      const gmx::VirtualSitesHandler* vsite,
2551                      const t_inputrec*               ir,
2552                      gmx_bool                        bBCheck,
2553                      gmx::ArrayRef<cginfo_mb_t>      cginfo_mb)
2554 {
2555     gmx_domdec_comm_t* comm;
2556
2557     dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2558
2559     comm = dd->comm;
2560
2561     if (comm->systemInfo.filterBondedCommunication)
2562     {
2563         /* Communicate atoms beyond the cut-off for bonded interactions */
2564         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2565     }
2566     else
2567     {
2568         /* Only communicate atoms based on cut-off */
2569         comm->bondedLinks = nullptr;
2570     }
2571 }
2572
2573 static void writeSettings(gmx::TextWriter*   log,
2574                           gmx_domdec_t*      dd,
2575                           const gmx_mtop_t*  mtop,
2576                           const t_inputrec*  ir,
2577                           gmx_bool           bDynLoadBal,
2578                           real               dlb_scale,
2579                           const gmx_ddbox_t* ddbox)
2580 {
2581     gmx_domdec_comm_t* comm;
2582     int                d;
2583     ivec               np;
2584     real               limit, shrink;
2585
2586     comm = dd->comm;
2587
2588     if (bDynLoadBal)
2589     {
2590         log->writeString("The maximum number of communication pulses is:");
2591         for (d = 0; d < dd->ndim; d++)
2592         {
2593             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2594         }
2595         log->ensureLineBreak();
2596         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2597                                 comm->cellsize_limit);
2598         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2599         log->writeString("The allowed shrink of domain decomposition cells is:");
2600         for (d = 0; d < DIM; d++)
2601         {
2602             if (dd->numCells[d] > 1)
2603             {
2604                 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2605                 {
2606                     shrink = 0;
2607                 }
2608                 else
2609                 {
2610                     shrink = comm->cellsize_min_dlb[d]
2611                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2612                 }
2613                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2614             }
2615         }
2616         log->ensureLineBreak();
2617     }
2618     else
2619     {
2620         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2621         log->writeString("The initial number of communication pulses is:");
2622         for (d = 0; d < dd->ndim; d++)
2623         {
2624             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2625         }
2626         log->ensureLineBreak();
2627         log->writeString("The initial domain decomposition cell size is:");
2628         for (d = 0; d < DIM; d++)
2629         {
2630             if (dd->numCells[d] > 1)
2631             {
2632                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2633             }
2634         }
2635         log->ensureLineBreak();
2636         log->writeLine();
2637     }
2638
2639     const bool haveInterDomainVsites =
2640             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2641
2642     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2643         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2644     {
2645         std::string decompUnits;
2646         if (comm->systemInfo.useUpdateGroups)
2647         {
2648             decompUnits = "atom groups";
2649         }
2650         else
2651         {
2652             decompUnits = "atoms";
2653         }
2654
2655         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2656                                 decompUnits.c_str());
2657         log->writeLineFormatted(
2658                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2659
2660         if (bDynLoadBal)
2661         {
2662             limit = dd->comm->cellsize_limit;
2663         }
2664         else
2665         {
2666             if (dd->unitCellInfo.ddBoxIsDynamic)
2667             {
2668                 log->writeLine(
2669                         "(the following are initial values, they could change due to box "
2670                         "deformation)");
2671             }
2672             limit = dd->comm->cellsize_min[XX];
2673             for (d = 1; d < DIM; d++)
2674             {
2675                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2676             }
2677         }
2678
2679         if (comm->systemInfo.haveInterDomainBondeds)
2680         {
2681             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2682                                     "two-body bonded interactions",
2683                                     "(-rdd)",
2684                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2685             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2686                                     "multi-body bonded interactions",
2687                                     "(-rdd)",
2688                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2689                                             ? comm->cutoff_mbody
2690                                             : std::min(comm->systemInfo.cutoff, limit));
2691         }
2692         if (haveInterDomainVsites)
2693         {
2694             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2695         }
2696         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2697         {
2698             std::string separation =
2699                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2700             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2701         }
2702         log->ensureLineBreak();
2703     }
2704 }
2705
2706 static void logSettings(const gmx::MDLogger& mdlog,
2707                         gmx_domdec_t*        dd,
2708                         const gmx_mtop_t*    mtop,
2709                         const t_inputrec*    ir,
2710                         real                 dlb_scale,
2711                         const gmx_ddbox_t*   ddbox)
2712 {
2713     gmx::StringOutputStream stream;
2714     gmx::TextWriter         log(&stream);
2715     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2716     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2717     {
2718         {
2719             log.ensureEmptyLine();
2720             log.writeLine(
2721                     "When dynamic load balancing gets turned on, these settings will change to:");
2722         }
2723         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2724     }
2725     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2726 }
2727
2728 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2729                                 gmx_domdec_t*        dd,
2730                                 real                 dlb_scale,
2731                                 const t_inputrec*    ir,
2732                                 const gmx_ddbox_t*   ddbox)
2733 {
2734     gmx_domdec_comm_t* comm;
2735     int                d, dim, npulse, npulse_d_max, npulse_d;
2736     gmx_bool           bNoCutOff;
2737
2738     comm = dd->comm;
2739
2740     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2741
2742     /* Determine the maximum number of comm. pulses in one dimension */
2743
2744     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2745
2746     /* Determine the maximum required number of grid pulses */
2747     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2748     {
2749         /* Only a single pulse is required */
2750         npulse = 1;
2751     }
2752     else if (!bNoCutOff && comm->cellsize_limit > 0)
2753     {
2754         /* We round down slightly here to avoid overhead due to the latency
2755          * of extra communication calls when the cut-off
2756          * would be only slightly longer than the cell size.
2757          * Later cellsize_limit is redetermined,
2758          * so we can not miss interactions due to this rounding.
2759          */
2760         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2761     }
2762     else
2763     {
2764         /* There is no cell size limit */
2765         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2766     }
2767
2768     if (!bNoCutOff && npulse > 1)
2769     {
2770         /* See if we can do with less pulses, based on dlb_scale */
2771         npulse_d_max = 0;
2772         for (d = 0; d < dd->ndim; d++)
2773         {
2774             dim      = dd->dim[d];
2775             npulse_d = static_cast<int>(
2776                     1
2777                     + dd->numCells[dim] * comm->systemInfo.cutoff
2778                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2779             npulse_d_max = std::max(npulse_d_max, npulse_d);
2780         }
2781         npulse = std::min(npulse, npulse_d_max);
2782     }
2783
2784     /* This env var can override npulse */
2785     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2786     if (d > 0)
2787     {
2788         npulse = d;
2789     }
2790
2791     comm->maxpulse       = 1;
2792     comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2793     for (d = 0; d < dd->ndim; d++)
2794     {
2795         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2796         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2797         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2798         {
2799             comm->bVacDLBNoLimit = FALSE;
2800         }
2801     }
2802
2803     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2804     if (!comm->bVacDLBNoLimit)
2805     {
2806         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2807     }
2808     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2809     /* Set the minimum cell size for each DD dimension */
2810     for (d = 0; d < dd->ndim; d++)
2811     {
2812         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2813         {
2814             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2815         }
2816         else
2817         {
2818             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2819         }
2820     }
2821     if (comm->cutoff_mbody <= 0)
2822     {
2823         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2824     }
2825     if (isDlbOn(comm))
2826     {
2827         set_dlb_limits(dd);
2828     }
2829 }
2830
2831 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2832 {
2833     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2834 }
2835
2836 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2837 {
2838     /* If each molecule is a single charge group
2839      * or we use domain decomposition for each periodic dimension,
2840      * we do not need to take pbc into account for the bonded interactions.
2841      */
2842     return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2843             && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2844                  && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2845 }
2846
2847 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2848 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2849                                   gmx_domdec_t*        dd,
2850                                   real                 dlb_scale,
2851                                   const gmx_mtop_t*    mtop,
2852                                   const t_inputrec*    ir,
2853                                   const gmx_ddbox_t*   ddbox)
2854 {
2855     gmx_domdec_comm_t* comm        = dd->comm;
2856     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2857
2858     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2859     {
2860         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2861         if (ddRankSetup.npmedecompdim >= 2)
2862         {
2863             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2864         }
2865     }
2866     else
2867     {
2868         ddRankSetup.numRanksDoingPme = 0;
2869         if (dd->pme_nodeid >= 0)
2870         {
2871             gmx_fatal_collective(FARGS,
2872                                  dd->mpi_comm_all,
2873                                  DDMASTER(dd),
2874                                  "Can not have separate PME ranks without PME electrostatics");
2875         }
2876     }
2877
2878     if (debug)
2879     {
2880         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2881     }
2882     if (!isDlbDisabled(comm))
2883     {
2884         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2885     }
2886
2887     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2888
2889     real vol_frac;
2890     if (ir->pbcType == PbcType::No)
2891     {
2892         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2893     }
2894     else
2895     {
2896         vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2897                    / static_cast<double>(dd->nnodes);
2898     }
2899     if (debug)
2900     {
2901         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2902     }
2903     int natoms_tot = mtop->natoms;
2904
2905     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2906 }
2907
2908 /*! \brief Get some important DD parameters which can be modified by env.vars */
2909 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2910                                 const DomdecOptions&     options,
2911                                 const gmx::MdrunOptions& mdrunOptions,
2912                                 const t_inputrec&        ir)
2913 {
2914     DDSettings ddSettings;
2915
2916     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2917     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2918     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2919     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2920     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2921     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2922     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2923     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2924     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2925
2926     if (ddSettings.useSendRecv2)
2927     {
2928         GMX_LOG(mdlog.info)
2929                 .appendText(
2930                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2931                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2932                         "communication");
2933     }
2934
2935     if (ddSettings.eFlop)
2936     {
2937         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2938         ddSettings.recordLoad = true;
2939     }
2940     else
2941     {
2942         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2943     }
2944
2945     ddSettings.initialDlbState = determineInitialDlbState(
2946             mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2947     GMX_LOG(mdlog.info)
2948             .appendTextFormatted("Dynamic load balancing: %s",
2949                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2950
2951     return ddSettings;
2952 }
2953
2954 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2955
2956 namespace gmx
2957 {
2958
2959 // TODO once the functionality stablizes, move this class and
2960 // supporting functionality into builder.cpp
2961 /*! \brief Impl class for DD builder */
2962 class DomainDecompositionBuilder::Impl
2963 {
2964 public:
2965     //! Constructor
2966     Impl(const MDLogger&      mdlog,
2967          t_commrec*           cr,
2968          const DomdecOptions& options,
2969          const MdrunOptions&  mdrunOptions,
2970          const gmx_mtop_t&    mtop,
2971          const t_inputrec&    ir,
2972          const matrix         box,
2973          ArrayRef<const RVec> xGlobal);
2974
2975     //! Build the resulting DD manager
2976     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2977
2978     //! Objects used in constructing and configuring DD
2979     //! {
2980     //! Logging object
2981     const MDLogger& mdlog_;
2982     //! Communication object
2983     t_commrec* cr_;
2984     //! User-supplied options configuring DD behavior
2985     const DomdecOptions options_;
2986     //! Global system topology
2987     const gmx_mtop_t& mtop_;
2988     //! User input values from the tpr file
2989     const t_inputrec& ir_;
2990     //! }
2991
2992     //! Internal objects used in constructing DD
2993     //! {
2994     //! Settings combined from the user input
2995     DDSettings ddSettings_;
2996     //! Information derived from the simulation system
2997     DDSystemInfo systemInfo_;
2998     //! Box structure
2999     gmx_ddbox_t ddbox_ = { 0 };
3000     //! Organization of the DD grids
3001     DDGridSetup ddGridSetup_;
3002     //! Organzation of the DD ranks
3003     DDRankSetup ddRankSetup_;
3004     //! Number of DD cells in each dimension
3005     ivec ddCellIndex_ = { 0, 0, 0 };
3006     //! IDs of PME-only ranks
3007     std::vector<int> pmeRanks_;
3008     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3009     CartesianRankSetup cartSetup_;
3010     //! }
3011 };
3012
3013 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3014                                        t_commrec*           cr,
3015                                        const DomdecOptions& options,
3016                                        const MdrunOptions&  mdrunOptions,
3017                                        const gmx_mtop_t&    mtop,
3018                                        const t_inputrec&    ir,
3019                                        const matrix         box,
3020                                        ArrayRef<const RVec> xGlobal) :
3021     mdlog_(mdlog),
3022     cr_(cr),
3023     options_(options),
3024     mtop_(mtop),
3025     ir_(ir)
3026 {
3027     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
3028
3029     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3030
3031     if (ddSettings_.eFlop > 1)
3032     {
3033         /* Ensure that we have different random flop counts on different ranks */
3034         srand(1 + cr_->rankInDefaultCommunicator);
3035     }
3036
3037     systemInfo_ = getSystemInfo(mdlog_,
3038                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3039                                 cr->mpiDefaultCommunicator,
3040                                 options_,
3041                                 mtop_,
3042                                 ir_,
3043                                 box,
3044                                 xGlobal);
3045
3046     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
3047     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
3048     checkForValidRankCountRequests(
3049             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
3050
3051     // DD grid setup uses a more different cell size limit for
3052     // automated setup than the one in systemInfo_. The latter is used
3053     // in set_dd_limits() to configure DLB, for example.
3054     const real gridSetupCellsizeLimit =
3055             getDDGridSetupCellSizeLimit(mdlog_,
3056                                         !isDlbDisabled(ddSettings_.initialDlbState),
3057                                         options_.dlbScaling,
3058                                         ir_,
3059                                         systemInfo_.cellsizeLimit);
3060     ddGridSetup_ = getDDGridSetup(mdlog_,
3061                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3062                                   cr->mpiDefaultCommunicator,
3063                                   numRanksRequested,
3064                                   options_,
3065                                   ddSettings_,
3066                                   systemInfo_,
3067                                   gridSetupCellsizeLimit,
3068                                   mtop_,
3069                                   ir_,
3070                                   box,
3071                                   xGlobal,
3072                                   &ddbox_);
3073     checkDDGridSetup(ddGridSetup_,
3074                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3075                      cr->mpiDefaultCommunicator,
3076                      cr->sizeOfDefaultCommunicator,
3077                      options_,
3078                      ddSettings_,
3079                      systemInfo_,
3080                      gridSetupCellsizeLimit,
3081                      ddbox_);
3082
3083     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3084
3085     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
3086
3087     /* Generate the group communicator, also decides the duty of each rank */
3088     cartSetup_ = makeGroupCommunicators(
3089             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
3090 }
3091
3092 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3093 {
3094     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3095
3096     copy_ivec(ddCellIndex_, dd->ci);
3097
3098     dd->comm = init_dd_comm();
3099
3100     dd->comm->ddRankSetup        = ddRankSetup_;
3101     dd->comm->cartesianRankSetup = cartSetup_;
3102
3103     set_dd_limits(mdlog_,
3104                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3105                   dd,
3106                   options_,
3107                   ddSettings_,
3108                   systemInfo_,
3109                   ddGridSetup_,
3110                   ddRankSetup_.numPPRanks,
3111                   &mtop_,
3112                   &ir_,
3113                   ddbox_);
3114
3115     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3116
3117     if (thisRankHasDuty(cr_, DUTY_PP))
3118     {
3119         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3120
3121         setup_neighbor_relations(dd);
3122     }
3123
3124     /* Set overallocation to avoid frequent reallocation of arrays */
3125     set_over_alloc_dd(TRUE);
3126
3127     dd->atomSets = atomSets;
3128
3129     return dd;
3130 }
3131
3132 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3133                                                        t_commrec*           cr,
3134                                                        const DomdecOptions& options,
3135                                                        const MdrunOptions&  mdrunOptions,
3136                                                        const gmx_mtop_t&    mtop,
3137                                                        const t_inputrec&    ir,
3138                                                        const matrix         box,
3139                                                        ArrayRef<const RVec> xGlobal) :
3140     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
3141 {
3142 }
3143
3144 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3145 {
3146     return impl_->build(atomSets);
3147 }
3148
3149 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3150
3151 } // namespace gmx
3152
3153 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3154 {
3155     gmx_ddbox_t ddbox;
3156     int         d, dim, np;
3157     real        inv_cell_size;
3158     int         LocallyLimited;
3159
3160     const auto* dd = cr->dd;
3161
3162     set_ddbox(*dd, false, box, true, x, &ddbox);
3163
3164     LocallyLimited = 0;
3165
3166     for (d = 0; d < dd->ndim; d++)
3167     {
3168         dim = dd->dim[d];
3169
3170         inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3171         if (dd->unitCellInfo.ddBoxIsDynamic)
3172         {
3173             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3174         }
3175
3176         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3177
3178         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3179         {
3180             if (np > dd->comm->cd[d].np_dlb)
3181             {
3182                 return FALSE;
3183             }
3184
3185             /* If a current local cell size is smaller than the requested
3186              * cut-off, we could still fix it, but this gets very complicated.
3187              * Without fixing here, we might actually need more checks.
3188              */
3189             real cellSizeAlongDim =
3190                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3191             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3192             {
3193                 LocallyLimited = 1;
3194             }
3195         }
3196     }
3197
3198     if (!isDlbDisabled(dd->comm))
3199     {
3200         /* If DLB is not active yet, we don't need to check the grid jumps.
3201          * Actually we shouldn't, because then the grid jump data is not set.
3202          */
3203         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3204         {
3205             LocallyLimited = 1;
3206         }
3207
3208         gmx_sumi(1, &LocallyLimited, cr);
3209
3210         if (LocallyLimited > 0)
3211         {
3212             return FALSE;
3213         }
3214     }
3215
3216     return TRUE;
3217 }
3218
3219 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3220 {
3221     gmx_bool bCutoffAllowed;
3222
3223     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3224
3225     if (bCutoffAllowed)
3226     {
3227         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3228     }
3229
3230     return bCutoffAllowed;
3231 }
3232
3233 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3234                               const t_commrec&                cr,
3235                               const gmx::DeviceStreamManager& deviceStreamManager,
3236                               gmx_wallcycle*                  wcycle)
3237 {
3238     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3239                        "Local non-bonded stream should be valid when using"
3240                        "GPU halo exchange.");
3241     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3242                        "Non-local non-bonded stream should be valid when using "
3243                        "GPU halo exchange.");
3244
3245     if (cr.dd->gpuHaloExchange[0].empty())
3246     {
3247         GMX_LOG(mdlog.warning)
3248                 .asParagraph()
3249                 .appendTextFormatted(
3250                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3251                         "by the "
3252                         "GMX_GPU_DD_COMMS environment variable.");
3253     }
3254
3255     for (int d = 0; d < cr.dd->ndim; d++)
3256     {
3257         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3258         {
3259             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3260                     cr.dd,
3261                     d,
3262                     cr.mpi_comm_mysim,
3263                     deviceStreamManager.context(),
3264                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3265                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3266                     pulse,
3267                     wcycle));
3268         }
3269     }
3270 }
3271
3272 void reinitGpuHaloExchange(const t_commrec&              cr,
3273                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3274                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3275 {
3276     for (int d = 0; d < cr.dd->ndim; d++)
3277     {
3278         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3279         {
3280             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3281         }
3282     }
3283 }
3284
3285 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3286                                    const matrix          box,
3287                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3288 {
3289     for (int d = 0; d < cr.dd->ndim; d++)
3290     {
3291         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3292         {
3293             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3294         }
3295     }
3296 }
3297
3298 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3299 {
3300     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3301     {
3302         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3303         {
3304             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3305         }
3306     }
3307 }