StatePropagatorDataGpu object to manage GPU forces, positions and velocities buffers
[alexxy/gromacs.git] / src / gromacs / domdec / domdec.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 #include "gmxpre.h"
37
38 #include "domdec.h"
39
40 #include "config.h"
41
42 #include <cassert>
43 #include <cinttypes>
44 #include <climits>
45 #include <cmath>
46 #include <cstdio>
47 #include <cstdlib>
48 #include <cstring>
49
50 #include <algorithm>
51 #include <memory>
52
53 #include "gromacs/domdec/builder.h"
54 #include "gromacs/domdec/collect.h"
55 #include "gromacs/domdec/dlb.h"
56 #include "gromacs/domdec/dlbtiming.h"
57 #include "gromacs/domdec/domdec_network.h"
58 #include "gromacs/domdec/ga2la.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
60 #include "gromacs/domdec/options.h"
61 #include "gromacs/domdec/partition.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/listed_forces/manage_threading.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vectypes.h"
69 #include "gromacs/mdlib/calc_verletbuf.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/updategroups.h"
73 #include "gromacs/mdlib/vsite.h"
74 #include "gromacs/mdtypes/commrec.h"
75 #include "gromacs/mdtypes/forceoutput.h"
76 #include "gromacs/mdtypes/inputrec.h"
77 #include "gromacs/mdtypes/mdrunoptions.h"
78 #include "gromacs/mdtypes/state.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/pulling/pull.h"
82 #include "gromacs/timing/wallcycle.h"
83 #include "gromacs/topology/block.h"
84 #include "gromacs/topology/idef.h"
85 #include "gromacs/topology/ifunc.h"
86 #include "gromacs/topology/mtop_lookup.h"
87 #include "gromacs/topology/mtop_util.h"
88 #include "gromacs/topology/topology.h"
89 #include "gromacs/utility/basedefinitions.h"
90 #include "gromacs/utility/basenetwork.h"
91 #include "gromacs/utility/cstringutil.h"
92 #include "gromacs/utility/exceptions.h"
93 #include "gromacs/utility/fatalerror.h"
94 #include "gromacs/utility/gmxmpi.h"
95 #include "gromacs/utility/logger.h"
96 #include "gromacs/utility/real.h"
97 #include "gromacs/utility/smalloc.h"
98 #include "gromacs/utility/strconvert.h"
99 #include "gromacs/utility/stringstream.h"
100 #include "gromacs/utility/stringutil.h"
101 #include "gromacs/utility/textwriter.h"
102
103 #include "atomdistribution.h"
104 #include "box.h"
105 #include "cellsizes.h"
106 #include "distribute.h"
107 #include "domdec_constraints.h"
108 #include "domdec_internal.h"
109 #include "domdec_setup.h"
110 #include "domdec_vsite.h"
111 #include "redistribute.h"
112 #include "utility.h"
113
114 // TODO remove this when moving domdec into gmx namespace
115 using gmx::DdRankOrder;
116 using gmx::DlbOption;
117 using gmx::DomdecOptions;
118
119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
120
121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
122 #define DD_CGIBS 2
123
124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_FLAG_NRCG  65535
126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
128
129 /* The DD zone order */
130 static const ivec dd_zo[DD_MAXZONE] =
131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
132
133 /* The non-bonded zone-pair setup for domain decomposition
134  * The first number is the i-zone, the second number the first j-zone seen by
135  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
136  * As is, this is for 3D decomposition, where there are 4 i-zones.
137  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
138  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
139  */
140 static const int
141     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
142                                                  {1, 3, 6},
143                                                  {2, 5, 6},
144                                                  {3, 5, 7}};
145
146
147 /*
148    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
149
150    static void index2xyz(ivec nc,int ind,ivec xyz)
151    {
152    xyz[XX] = ind % nc[XX];
153    xyz[YY] = (ind / nc[XX]) % nc[YY];
154    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
155    }
156  */
157
158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
159 {
160     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
161     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
162     xyz[ZZ] = ind % nc[ZZ];
163 }
164
165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
166 {
167     int                       ddnodeid = -1;
168
169     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
170     const int                 ddindex   = dd_index(dd->nc, c);
171     if (cartSetup.bCartesianPP_PME)
172     {
173         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
174     }
175     else if (cartSetup.bCartesianPP)
176     {
177 #if GMX_MPI
178         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
179 #endif
180     }
181     else
182     {
183         ddnodeid = ddindex;
184     }
185
186     return ddnodeid;
187 }
188
189 int ddglatnr(const gmx_domdec_t *dd, int i)
190 {
191     int atnr;
192
193     if (dd == nullptr)
194     {
195         atnr = i + 1;
196     }
197     else
198     {
199         if (i >= dd->comm->atomRanges.numAtomsTotal())
200         {
201             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
202         }
203         atnr = dd->globalAtomIndices[i] + 1;
204     }
205
206     return atnr;
207 }
208
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
210 {
211     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
213 }
214
215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
216 {
217     int i;
218
219     if (state->ddp_count != dd->ddp_count)
220     {
221         gmx_incons("The MD state does not match the domain decomposition state");
222     }
223
224     state->cg_gl.resize(dd->ncg_home);
225     for (i = 0; i < dd->ncg_home; i++)
226     {
227         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
228     }
229
230     state->ddp_count_cg_gl = dd->ddp_count;
231 }
232
233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
234 {
235     return &dd->comm->zones;
236 }
237
238 int dd_numAtomsZones(const gmx_domdec_t &dd)
239 {
240     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
241 }
242
243 int dd_numHomeAtoms(const gmx_domdec_t &dd)
244 {
245     return dd.comm->atomRanges.numHomeAtoms();
246 }
247
248 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
249 {
250     /* We currently set mdatoms entries for all atoms:
251      * local + non-local + communicated for vsite + constraints
252      */
253
254     return dd->comm->atomRanges.numAtomsTotal();
255 }
256
257 int dd_natoms_vsite(const gmx_domdec_t *dd)
258 {
259     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
260 }
261
262 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
263 {
264     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
265     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
266 }
267
268 void dd_move_x(gmx_domdec_t             *dd,
269                const matrix              box,
270                gmx::ArrayRef<gmx::RVec>  x,
271                gmx_wallcycle            *wcycle)
272 {
273     wallcycle_start(wcycle, ewcMOVEX);
274
275     int                    nzone, nat_tot;
276     gmx_domdec_comm_t     *comm;
277     gmx_domdec_comm_dim_t *cd;
278     rvec                   shift = {0, 0, 0};
279     gmx_bool               bPBC, bScrew;
280
281     comm = dd->comm;
282
283     nzone   = 1;
284     nat_tot = comm->atomRanges.numHomeAtoms();
285     for (int d = 0; d < dd->ndim; d++)
286     {
287         bPBC   = (dd->ci[dd->dim[d]] == 0);
288         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
289         if (bPBC)
290         {
291             copy_rvec(box[dd->dim[d]], shift);
292         }
293         cd = &comm->cd[d];
294         for (const gmx_domdec_ind_t &ind : cd->ind)
295         {
296             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
297             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
298             int                        n          = 0;
299             if (!bPBC)
300             {
301                 for (int j : ind.index)
302                 {
303                     sendBuffer[n] = x[j];
304                     n++;
305                 }
306             }
307             else if (!bScrew)
308             {
309                 for (int j : ind.index)
310                 {
311                     /* We need to shift the coordinates */
312                     for (int d = 0; d < DIM; d++)
313                     {
314                         sendBuffer[n][d] = x[j][d] + shift[d];
315                     }
316                     n++;
317                 }
318             }
319             else
320             {
321                 for (int j : ind.index)
322                 {
323                     /* Shift x */
324                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
325                     /* Rotate y and z.
326                      * This operation requires a special shift force
327                      * treatment, which is performed in calc_vir.
328                      */
329                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
330                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
331                     n++;
332                 }
333             }
334
335             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
336
337             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
338             if (cd->receiveInPlace)
339             {
340                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
341             }
342             else
343             {
344                 receiveBuffer = receiveBufferAccess.buffer;
345             }
346             /* Send and receive the coordinates */
347             ddSendrecv(dd, d, dddirBackward,
348                        sendBuffer, receiveBuffer);
349
350             if (!cd->receiveInPlace)
351             {
352                 int j = 0;
353                 for (int zone = 0; zone < nzone; zone++)
354                 {
355                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
356                     {
357                         x[i] = receiveBuffer[j++];
358                     }
359                 }
360             }
361             nat_tot += ind.nrecv[nzone+1];
362         }
363         nzone += nzone;
364     }
365
366     wallcycle_stop(wcycle, ewcMOVEX);
367 }
368
369 void dd_move_f(gmx_domdec_t              *dd,
370                gmx::ForceWithShiftForces *forceWithShiftForces,
371                gmx_wallcycle             *wcycle)
372 {
373     wallcycle_start(wcycle, ewcMOVEF);
374
375     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
376     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
377
378     gmx_domdec_comm_t       &comm    = *dd->comm;
379     int                      nzone   = comm.zones.n/2;
380     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
381     for (int d = dd->ndim-1; d >= 0; d--)
382     {
383         /* Only forces in domains near the PBC boundaries need to
384            consider PBC in the treatment of fshift */
385         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
386         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
387         /* Determine which shift vector we need */
388         ivec       vis   = { 0, 0, 0 };
389         vis[dd->dim[d]]  = 1;
390         const int  is    = IVEC2IS(vis);
391
392         /* Loop over the pulses */
393         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
394         for (int p = cd.numPulses() - 1; p >= 0; p--)
395         {
396             const gmx_domdec_ind_t    &ind  = cd.ind[p];
397             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
398             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
399
400             nat_tot                                 -= ind.nrecv[nzone+1];
401
402             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
403
404             gmx::ArrayRef<gmx::RVec>   sendBuffer;
405             if (cd.receiveInPlace)
406             {
407                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
408             }
409             else
410             {
411                 sendBuffer = sendBufferAccess.buffer;
412                 int j = 0;
413                 for (int zone = 0; zone < nzone; zone++)
414                 {
415                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
416                     {
417                         sendBuffer[j++] = f[i];
418                     }
419                 }
420             }
421             /* Communicate the forces */
422             ddSendrecv(dd, d, dddirForward,
423                        sendBuffer, receiveBuffer);
424             /* Add the received forces */
425             int n = 0;
426             if (!shiftForcesNeedPbc)
427             {
428                 for (int j : ind.index)
429                 {
430                     for (int d = 0; d < DIM; d++)
431                     {
432                         f[j][d] += receiveBuffer[n][d];
433                     }
434                     n++;
435                 }
436             }
437             else if (!applyScrewPbc)
438             {
439                 for (int j : ind.index)
440                 {
441                     for (int d = 0; d < DIM; d++)
442                     {
443                         f[j][d] += receiveBuffer[n][d];
444                     }
445                     /* Add this force to the shift force */
446                     for (int d = 0; d < DIM; d++)
447                     {
448                         fshift[is][d] += receiveBuffer[n][d];
449                     }
450                     n++;
451                 }
452             }
453             else
454             {
455                 for (int j : ind.index)
456                 {
457                     /* Rotate the force */
458                     f[j][XX] += receiveBuffer[n][XX];
459                     f[j][YY] -= receiveBuffer[n][YY];
460                     f[j][ZZ] -= receiveBuffer[n][ZZ];
461                     if (shiftForcesNeedPbc)
462                     {
463                         /* Add this force to the shift force */
464                         for (int d = 0; d < DIM; d++)
465                         {
466                             fshift[is][d] += receiveBuffer[n][d];
467                         }
468                     }
469                     n++;
470                 }
471             }
472         }
473         nzone /= 2;
474     }
475     wallcycle_stop(wcycle, ewcMOVEF);
476 }
477
478 /* Convenience function for extracting a real buffer from an rvec buffer
479  *
480  * To reduce the number of temporary communication buffers and avoid
481  * cache polution, we reuse gmx::RVec buffers for storing reals.
482  * This functions return a real buffer reference with the same number
483  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
484  */
485 static gmx::ArrayRef<real>
486 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
487 {
488     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
489                                   arrayRef.size());
490 }
491
492 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
493 {
494     int                    nzone, nat_tot;
495     gmx_domdec_comm_t     *comm;
496     gmx_domdec_comm_dim_t *cd;
497
498     comm = dd->comm;
499
500     nzone   = 1;
501     nat_tot = comm->atomRanges.numHomeAtoms();
502     for (int d = 0; d < dd->ndim; d++)
503     {
504         cd = &comm->cd[d];
505         for (const gmx_domdec_ind_t &ind : cd->ind)
506         {
507             /* Note: We provision for RVec instead of real, so a factor of 3
508              * more than needed. The buffer actually already has this size
509              * and we pass a plain pointer below, so this does not matter.
510              */
511             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
512             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
513             int                       n          = 0;
514             for (int j : ind.index)
515             {
516                 sendBuffer[n++] = v[j];
517             }
518
519             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
520
521             gmx::ArrayRef<real>       receiveBuffer;
522             if (cd->receiveInPlace)
523             {
524                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
525             }
526             else
527             {
528                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
529             }
530             /* Send and receive the data */
531             ddSendrecv(dd, d, dddirBackward,
532                        sendBuffer, receiveBuffer);
533             if (!cd->receiveInPlace)
534             {
535                 int j = 0;
536                 for (int zone = 0; zone < nzone; zone++)
537                 {
538                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
539                     {
540                         v[i] = receiveBuffer[j++];
541                     }
542                 }
543             }
544             nat_tot += ind.nrecv[nzone+1];
545         }
546         nzone += nzone;
547     }
548 }
549
550 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
551 {
552     int                    nzone, nat_tot;
553     gmx_domdec_comm_t     *comm;
554     gmx_domdec_comm_dim_t *cd;
555
556     comm = dd->comm;
557
558     nzone   = comm->zones.n/2;
559     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
560     for (int d = dd->ndim-1; d >= 0; d--)
561     {
562         cd = &comm->cd[d];
563         for (int p = cd->numPulses() - 1; p >= 0; p--)
564         {
565             const gmx_domdec_ind_t &ind = cd->ind[p];
566
567             /* Note: We provision for RVec instead of real, so a factor of 3
568              * more than needed. The buffer actually already has this size
569              * and we typecast, so this works as intended.
570              */
571             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
572             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
573             nat_tot -= ind.nrecv[nzone + 1];
574
575             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
576
577             gmx::ArrayRef<real>       sendBuffer;
578             if (cd->receiveInPlace)
579             {
580                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
581             }
582             else
583             {
584                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
585                 int j = 0;
586                 for (int zone = 0; zone < nzone; zone++)
587                 {
588                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
589                     {
590                         sendBuffer[j++] = v[i];
591                     }
592                 }
593             }
594             /* Communicate the forces */
595             ddSendrecv(dd, d, dddirForward,
596                        sendBuffer, receiveBuffer);
597             /* Add the received forces */
598             int n = 0;
599             for (int j : ind.index)
600             {
601                 v[j] += receiveBuffer[n];
602                 n++;
603             }
604         }
605         nzone /= 2;
606     }
607 }
608
609 real dd_cutoff_multibody(const gmx_domdec_t *dd)
610 {
611     const gmx_domdec_comm_t &comm       = *dd->comm;
612     const DDSystemInfo      &systemInfo = comm.systemInfo;
613
614     real                     r = -1;
615     if (systemInfo.haveInterDomainMultiBodyBondeds)
616     {
617         if (comm.cutoff_mbody > 0)
618         {
619             r = comm.cutoff_mbody;
620         }
621         else
622         {
623             /* cutoff_mbody=0 means we do not have DLB */
624             r = comm.cellsize_min[dd->dim[0]];
625             for (int di = 1; di < dd->ndim; di++)
626             {
627                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
628             }
629             if (comm.systemInfo.filterBondedCommunication)
630             {
631                 r = std::max(r, comm.cutoff_mbody);
632             }
633             else
634             {
635                 r = std::min(r, systemInfo.cutoff);
636             }
637         }
638     }
639
640     return r;
641 }
642
643 real dd_cutoff_twobody(const gmx_domdec_t *dd)
644 {
645     real r_mb;
646
647     r_mb = dd_cutoff_multibody(dd);
648
649     return std::max(dd->comm->systemInfo.cutoff, r_mb);
650 }
651
652
653 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
654                                    const CartesianRankSetup &cartSetup,
655                                    const ivec                coord,
656                                    ivec                      coord_pme)
657 {
658     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
659     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
660     copy_ivec(coord, coord_pme);
661     coord_pme[cartSetup.cartpmedim] =
662         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
663 }
664
665 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
666 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
667                             const int          ddCellIndex)
668 {
669     const int npp  = ddRankSetup.numPPRanks;
670     const int npme = ddRankSetup.numRanksDoingPme;
671
672     /* Here we assign a PME node to communicate with this DD node
673      * by assuming that the major index of both is x.
674      * We add npme/2 to obtain an even distribution.
675      */
676     return (ddCellIndex*npme + npme/2)/npp;
677 }
678
679 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
680 {
681     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
682
683     int              n = 0;
684     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
685     {
686         const int p0 = ddindex2pmeindex(ddRankSetup, i);
687         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
688         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
689         {
690             if (debug)
691             {
692                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
693             }
694             pmeRanks[n] = i + 1 + n;
695             n++;
696         }
697     }
698
699     return pmeRanks;
700 }
701
702 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
703 {
704     gmx_domdec_t *dd;
705     ivec          coords;
706     int           slab;
707
708     dd = cr->dd;
709     /*
710        if (dd->comm->bCartesian) {
711        gmx_ddindex2xyz(dd->nc,ddindex,coords);
712        dd_coords2pmecoords(dd,coords,coords_pme);
713        copy_ivec(dd->ntot,nc);
714        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
715        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
716
717        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
718        } else {
719        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
720        }
721      */
722     coords[XX] = x;
723     coords[YY] = y;
724     coords[ZZ] = z;
725     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
726
727     return slab;
728 }
729
730 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
731 {
732     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
733     ivec                      coords    = { x, y, z };
734     int                       nodeid    = -1;
735
736     if (cartSetup.bCartesianPP_PME)
737     {
738 #if GMX_MPI
739         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
740 #endif
741     }
742     else
743     {
744         int ddindex = dd_index(cr->dd->nc, coords);
745         if (cartSetup.bCartesianPP)
746         {
747             nodeid = cartSetup.ddindex2simnodeid[ddindex];
748         }
749         else
750         {
751             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
752             {
753                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
754             }
755             else
756             {
757                 nodeid = ddindex;
758             }
759         }
760     }
761
762     return nodeid;
763 }
764
765 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
766                               const CartesianRankSetup   &cartSetup,
767                               gmx::ArrayRef<const int>    pmeRanks,
768                               const t_commrec gmx_unused *cr,
769                               const int                   sim_nodeid)
770 {
771     int                pmenode = -1;
772
773     /* This assumes a uniform x domain decomposition grid cell size */
774     if (cartSetup.bCartesianPP_PME)
775     {
776 #if GMX_MPI
777         ivec coord, coord_pme;
778         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
779         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
780         {
781             /* This is a PP rank */
782             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
783             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
784         }
785 #endif
786     }
787     else if (cartSetup.bCartesianPP)
788     {
789         if (sim_nodeid < ddRankSetup.numPPRanks)
790         {
791             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
792         }
793     }
794     else
795     {
796         /* This assumes DD cells with identical x coordinates
797          * are numbered sequentially.
798          */
799         if (pmeRanks.empty())
800         {
801             if (sim_nodeid < ddRankSetup.numPPRanks)
802             {
803                 /* The DD index equals the nodeid */
804                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
805             }
806         }
807         else
808         {
809             int i = 0;
810             while (sim_nodeid > pmeRanks[i])
811             {
812                 i++;
813             }
814             if (sim_nodeid < pmeRanks[i])
815             {
816                 pmenode = pmeRanks[i];
817             }
818         }
819     }
820
821     return pmenode;
822 }
823
824 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
825 {
826     if (dd != nullptr)
827     {
828         return {
829                    dd->comm->ddRankSetup.npmenodes_x,
830                    dd->comm->ddRankSetup.npmenodes_y
831         };
832     }
833     else
834     {
835         return {
836                    1, 1
837         };
838     }
839 }
840
841 std::vector<int> get_pme_ddranks(const t_commrec *cr,
842                                  const int        pmenodeid)
843 {
844     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
845     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
846     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
847     std::vector<int>          ddranks;
848     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
849
850     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
851     {
852         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
853         {
854             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
855             {
856                 if (cartSetup.bCartesianPP_PME)
857                 {
858                     ivec coord = { x, y, z };
859                     ivec coord_pme;
860                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
861                     if (cr->dd->ci[XX] == coord_pme[XX] &&
862                         cr->dd->ci[YY] == coord_pme[YY] &&
863                         cr->dd->ci[ZZ] == coord_pme[ZZ])
864                     {
865                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
866                     }
867                 }
868                 else
869                 {
870                     /* The slab corresponds to the nodeid in the PME group */
871                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
872                     {
873                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
874                     }
875                 }
876             }
877         }
878     }
879     return ddranks;
880 }
881
882 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
883                                  gmx::ArrayRef<const int>  pmeRanks,
884                                  const t_commrec          *cr)
885 {
886     gmx_bool           bReceive    = TRUE;
887
888     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
889     if (ddRankSetup.usePmeOnlyRanks)
890     {
891         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
892         if (cartSetup.bCartesianPP_PME)
893         {
894 #if GMX_MPI
895             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
896             ivec coords;
897             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
898             coords[cartSetup.cartpmedim]++;
899             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
900             {
901                 int rank;
902                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
903                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
904                 {
905                     /* This is not the last PP node for pmenode */
906                     bReceive = FALSE;
907                 }
908             }
909 #else
910             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
911 #endif
912         }
913         else
914         {
915             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
916             if (cr->sim_nodeid+1 < cr->nnodes &&
917                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
918             {
919                 /* This is not the last PP node for pmenode */
920                 bReceive = FALSE;
921             }
922         }
923     }
924
925     return bReceive;
926 }
927
928 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
929 {
930     gmx_domdec_comm_t *comm;
931     int                i;
932
933     comm = dd->comm;
934
935     snew(*dim_f, dd->nc[dim]+1);
936     (*dim_f)[0] = 0;
937     for (i = 1; i < dd->nc[dim]; i++)
938     {
939         if (comm->slb_frac[dim])
940         {
941             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
942         }
943         else
944         {
945             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
946         }
947     }
948     (*dim_f)[dd->nc[dim]] = 1;
949 }
950
951 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
952 {
953     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
954
955     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
956     {
957         ddpme->dim = YY;
958     }
959     else
960     {
961         ddpme->dim = dimind;
962     }
963     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
964
965     ddpme->nslab = (ddpme->dim == 0 ?
966                     ddRankSetup.npmenodes_x :
967                     ddRankSetup.npmenodes_y);
968
969     if (ddpme->nslab <= 1)
970     {
971         return;
972     }
973
974     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
975     /* Determine for each PME slab the PP location range for dimension dim */
976     snew(ddpme->pp_min, ddpme->nslab);
977     snew(ddpme->pp_max, ddpme->nslab);
978     for (int slab = 0; slab < ddpme->nslab; slab++)
979     {
980         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
981         ddpme->pp_max[slab] = 0;
982     }
983     for (int i = 0; i < dd->nnodes; i++)
984     {
985         ivec xyz;
986         ddindex2xyz(dd->nc, i, xyz);
987         /* For y only use our y/z slab.
988          * This assumes that the PME x grid size matches the DD grid size.
989          */
990         if (dimind == 0 || xyz[XX] == dd->ci[XX])
991         {
992             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
993             int       slab;
994             if (dimind == 0)
995             {
996                 slab = pmeindex/nso;
997             }
998             else
999             {
1000                 slab = pmeindex % ddpme->nslab;
1001             }
1002             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1003             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1004         }
1005     }
1006
1007     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1008 }
1009
1010 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1011 {
1012     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1013
1014     if (ddRankSetup.ddpme[0].dim == XX)
1015     {
1016         return ddRankSetup.ddpme[0].maxshift;
1017     }
1018     else
1019     {
1020         return 0;
1021     }
1022 }
1023
1024 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1025 {
1026     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1027
1028     if (ddRankSetup.ddpme[0].dim == YY)
1029     {
1030         return ddRankSetup.ddpme[0].maxshift;
1031     }
1032     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1033     {
1034         return ddRankSetup.ddpme[1].maxshift;
1035     }
1036     else
1037     {
1038         return 0;
1039     }
1040 }
1041
1042 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1043 {
1044     return dd.comm->systemInfo.haveSplitConstraints;
1045 }
1046
1047 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1048 {
1049     /* Note that the cycles value can be incorrect, either 0 or some
1050      * extremely large value, when our thread migrated to another core
1051      * with an unsynchronized cycle counter. If this happens less often
1052      * that once per nstlist steps, this will not cause issues, since
1053      * we later subtract the maximum value from the sum over nstlist steps.
1054      * A zero count will slightly lower the total, but that's a small effect.
1055      * Note that the main purpose of the subtraction of the maximum value
1056      * is to avoid throwing off the load balancing when stalls occur due
1057      * e.g. system activity or network congestion.
1058      */
1059     dd->comm->cycl[ddCycl] += cycles;
1060     dd->comm->cycl_n[ddCycl]++;
1061     if (cycles > dd->comm->cycl_max[ddCycl])
1062     {
1063         dd->comm->cycl_max[ddCycl] = cycles;
1064     }
1065 }
1066
1067 #if GMX_MPI
1068 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1069 {
1070     MPI_Comm           c_row;
1071     int                dim, i, rank;
1072     ivec               loc_c;
1073     gmx_bool           bPartOfGroup = FALSE;
1074
1075     dim = dd->dim[dim_ind];
1076     copy_ivec(loc, loc_c);
1077     for (i = 0; i < dd->nc[dim]; i++)
1078     {
1079         loc_c[dim] = i;
1080         rank       = dd_index(dd->nc, loc_c);
1081         if (rank == dd->rank)
1082         {
1083             /* This process is part of the group */
1084             bPartOfGroup = TRUE;
1085         }
1086     }
1087     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1088                    &c_row);
1089     if (bPartOfGroup)
1090     {
1091         dd->comm->mpi_comm_load[dim_ind] = c_row;
1092         if (!isDlbDisabled(dd->comm))
1093         {
1094             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1095
1096             if (dd->ci[dim] == dd->master_ci[dim])
1097             {
1098                 /* This is the root process of this row */
1099                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1100
1101                 RowMaster &rowMaster = *cellsizes.rowMaster;
1102                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1103                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1104                 rowMaster.isCellMin.resize(dd->nc[dim]);
1105                 if (dim_ind > 0)
1106                 {
1107                     rowMaster.bounds.resize(dd->nc[dim]);
1108                 }
1109                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1110             }
1111             else
1112             {
1113                 /* This is not a root process, we only need to receive cell_f */
1114                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1115             }
1116         }
1117         if (dd->ci[dim] == dd->master_ci[dim])
1118         {
1119             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1120         }
1121     }
1122 }
1123 #endif
1124
1125 void dd_setup_dlb_resource_sharing(const t_commrec      *cr,
1126                                    int                   gpu_id)
1127 {
1128 #if GMX_MPI
1129     int           physicalnode_id_hash;
1130     gmx_domdec_t *dd;
1131     MPI_Comm      mpi_comm_pp_physicalnode;
1132
1133     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1134     {
1135         /* Only ranks with short-ranged tasks (currently) use GPUs.
1136          * If we don't have GPUs assigned, there are no resources to share.
1137          */
1138         return;
1139     }
1140
1141     physicalnode_id_hash = gmx_physicalnode_id_hash();
1142
1143     dd = cr->dd;
1144
1145     if (debug)
1146     {
1147         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1148         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1149                 dd->rank, physicalnode_id_hash, gpu_id);
1150     }
1151     /* Split the PP communicator over the physical nodes */
1152     /* TODO: See if we should store this (before), as it's also used for
1153      * for the nodecomm summation.
1154      */
1155     // TODO PhysicalNodeCommunicator could be extended/used to handle
1156     // the need for per-node per-group communicators.
1157     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1158                    &mpi_comm_pp_physicalnode);
1159     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1160                    &dd->comm->mpi_comm_gpu_shared);
1161     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1162     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1163
1164     if (debug)
1165     {
1166         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1167     }
1168
1169     /* Note that some ranks could share a GPU, while others don't */
1170
1171     if (dd->comm->nrank_gpu_shared == 1)
1172     {
1173         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1174     }
1175 #else
1176     GMX_UNUSED_VALUE(cr);
1177     GMX_UNUSED_VALUE(gpu_id);
1178 #endif
1179 }
1180
1181 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1182 {
1183 #if GMX_MPI
1184     int  dim0, dim1, i, j;
1185     ivec loc;
1186
1187     if (debug)
1188     {
1189         fprintf(debug, "Making load communicators\n");
1190     }
1191
1192     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1193     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1194
1195     if (dd->ndim == 0)
1196     {
1197         return;
1198     }
1199
1200     clear_ivec(loc);
1201     make_load_communicator(dd, 0, loc);
1202     if (dd->ndim > 1)
1203     {
1204         dim0 = dd->dim[0];
1205         for (i = 0; i < dd->nc[dim0]; i++)
1206         {
1207             loc[dim0] = i;
1208             make_load_communicator(dd, 1, loc);
1209         }
1210     }
1211     if (dd->ndim > 2)
1212     {
1213         dim0 = dd->dim[0];
1214         for (i = 0; i < dd->nc[dim0]; i++)
1215         {
1216             loc[dim0] = i;
1217             dim1      = dd->dim[1];
1218             for (j = 0; j < dd->nc[dim1]; j++)
1219             {
1220                 loc[dim1] = j;
1221                 make_load_communicator(dd, 2, loc);
1222             }
1223         }
1224     }
1225
1226     if (debug)
1227     {
1228         fprintf(debug, "Finished making load communicators\n");
1229     }
1230 #endif
1231 }
1232
1233 /*! \brief Sets up the relation between neighboring domains and zones */
1234 static void setup_neighbor_relations(gmx_domdec_t *dd)
1235 {
1236     int                     d, dim, i, j, m;
1237     ivec                    tmp, s;
1238     gmx_domdec_zones_t     *zones;
1239     gmx_domdec_ns_ranges_t *izone;
1240     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1241
1242     for (d = 0; d < dd->ndim; d++)
1243     {
1244         dim = dd->dim[d];
1245         copy_ivec(dd->ci, tmp);
1246         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1247         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1248         copy_ivec(dd->ci, tmp);
1249         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1250         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1251         if (debug)
1252         {
1253             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1254                     dd->rank, dim,
1255                     dd->neighbor[d][0],
1256                     dd->neighbor[d][1]);
1257         }
1258     }
1259
1260     int nzone  = (1 << dd->ndim);
1261     int nizone = (1 << std::max(dd->ndim - 1, 0));
1262     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1263
1264     zones = &dd->comm->zones;
1265
1266     for (i = 0; i < nzone; i++)
1267     {
1268         m = 0;
1269         clear_ivec(zones->shift[i]);
1270         for (d = 0; d < dd->ndim; d++)
1271         {
1272             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1273         }
1274     }
1275
1276     zones->n = nzone;
1277     for (i = 0; i < nzone; i++)
1278     {
1279         for (d = 0; d < DIM; d++)
1280         {
1281             s[d] = dd->ci[d] - zones->shift[i][d];
1282             if (s[d] < 0)
1283             {
1284                 s[d] += dd->nc[d];
1285             }
1286             else if (s[d] >= dd->nc[d])
1287             {
1288                 s[d] -= dd->nc[d];
1289             }
1290         }
1291     }
1292     zones->nizone = nizone;
1293     for (i = 0; i < zones->nizone; i++)
1294     {
1295         assert(ddNonbondedZonePairRanges[i][0] == i);
1296
1297         izone     = &zones->izone[i];
1298         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1299          * j-zones up to nzone.
1300          */
1301         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1302         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1303         for (dim = 0; dim < DIM; dim++)
1304         {
1305             if (dd->nc[dim] == 1)
1306             {
1307                 /* All shifts should be allowed */
1308                 izone->shift0[dim] = -1;
1309                 izone->shift1[dim] = 1;
1310             }
1311             else
1312             {
1313                 /* Determine the min/max j-zone shift wrt the i-zone */
1314                 izone->shift0[dim] = 1;
1315                 izone->shift1[dim] = -1;
1316                 for (j = izone->j0; j < izone->j1; j++)
1317                 {
1318                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1319                     if (shift_diff < izone->shift0[dim])
1320                     {
1321                         izone->shift0[dim] = shift_diff;
1322                     }
1323                     if (shift_diff > izone->shift1[dim])
1324                     {
1325                         izone->shift1[dim] = shift_diff;
1326                     }
1327                 }
1328             }
1329         }
1330     }
1331
1332     if (!isDlbDisabled(dd->comm))
1333     {
1334         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1335     }
1336
1337     if (dd->comm->ddSettings.recordLoad)
1338     {
1339         make_load_communicators(dd);
1340     }
1341 }
1342
1343 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1344                                  gmx_domdec_t         *dd,
1345                                  t_commrec gmx_unused *cr,
1346                                  bool gmx_unused       reorder)
1347 {
1348 #if GMX_MPI
1349     gmx_domdec_comm_t  *comm      = dd->comm;
1350     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1351
1352     if (cartSetup.bCartesianPP)
1353     {
1354         /* Set up cartesian communication for the particle-particle part */
1355         GMX_LOG(mdlog.info).appendTextFormatted(
1356                 "Will use a Cartesian communicator: %d x %d x %d",
1357                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1358
1359         ivec periods;
1360         for (int i = 0; i < DIM; i++)
1361         {
1362             periods[i] = TRUE;
1363         }
1364         MPI_Comm comm_cart;
1365         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1366                         &comm_cart);
1367         /* We overwrite the old communicator with the new cartesian one */
1368         cr->mpi_comm_mygroup = comm_cart;
1369     }
1370
1371     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1372     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1373
1374     if (cartSetup.bCartesianPP_PME)
1375     {
1376         /* Since we want to use the original cartesian setup for sim,
1377          * and not the one after split, we need to make an index.
1378          */
1379         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1380         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1381         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1382         /* Get the rank of the DD master,
1383          * above we made sure that the master node is a PP node.
1384          */
1385         int rank;
1386         if (MASTER(cr))
1387         {
1388             rank = dd->rank;
1389         }
1390         else
1391         {
1392             rank = 0;
1393         }
1394         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1395     }
1396     else if (cartSetup.bCartesianPP)
1397     {
1398         if (!comm->ddRankSetup.usePmeOnlyRanks)
1399         {
1400             /* The PP communicator is also
1401              * the communicator for this simulation
1402              */
1403             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1404         }
1405         cr->nodeid = dd->rank;
1406
1407         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1408
1409         /* We need to make an index to go from the coordinates
1410          * to the nodeid of this simulation.
1411          */
1412         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1413         std::vector<int> buf(dd->nnodes);
1414         if (thisRankHasDuty(cr, DUTY_PP))
1415         {
1416             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1417         }
1418         /* Communicate the ddindex to simulation nodeid index */
1419         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1420                       cr->mpi_comm_mysim);
1421
1422         /* Determine the master coordinates and rank.
1423          * The DD master should be the same node as the master of this sim.
1424          */
1425         for (int i = 0; i < dd->nnodes; i++)
1426         {
1427             if (cartSetup.ddindex2simnodeid[i] == 0)
1428             {
1429                 ddindex2xyz(dd->nc, i, dd->master_ci);
1430                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1431             }
1432         }
1433         if (debug)
1434         {
1435             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1436         }
1437     }
1438     else
1439     {
1440         /* No Cartesian communicators */
1441         /* We use the rank in dd->comm->all as DD index */
1442         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1443         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1444         dd->masterrank = 0;
1445         clear_ivec(dd->master_ci);
1446     }
1447 #endif
1448
1449     GMX_LOG(mdlog.info).appendTextFormatted(
1450             "Domain decomposition rank %d, coordinates %d %d %d\n",
1451             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1452     if (debug)
1453     {
1454         fprintf(debug,
1455                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1456                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1457     }
1458 }
1459
1460 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1461                                       t_commrec            *cr)
1462 {
1463 #if GMX_MPI
1464     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1465
1466     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1467     {
1468         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1469         std::vector<int> buf(dd->nnodes);
1470         if (thisRankHasDuty(cr, DUTY_PP))
1471         {
1472             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1473         }
1474         /* Communicate the ddindex to simulation nodeid index */
1475         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1476                       cr->mpi_comm_mysim);
1477     }
1478 #else
1479     GMX_UNUSED_VALUE(dd);
1480     GMX_UNUSED_VALUE(cr);
1481 #endif
1482 }
1483
1484 static CartesianRankSetup
1485 split_communicator(const gmx::MDLogger    &mdlog,
1486                    t_commrec              *cr,
1487                    const DdRankOrder       ddRankOrder,
1488                    bool gmx_unused         reorder,
1489                    const DDRankSetup      &ddRankSetup,
1490                    ivec                    ddCellIndex,
1491                    std::vector<int>       *pmeRanks)
1492 {
1493     CartesianRankSetup cartSetup;
1494
1495     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1496     cartSetup.bCartesianPP_PME = false;
1497
1498     const ivec &numDDCells     = ddRankSetup.numPPCells;
1499     /* Initially we set ntot to the number of PP cells */
1500     copy_ivec(numDDCells, cartSetup.ntot);
1501
1502     if (cartSetup.bCartesianPP)
1503     {
1504         const int numDDCellsTot = ddRankSetup.numPPRanks;
1505         bool      bDiv[DIM];
1506         for (int i = 1; i < DIM; i++)
1507         {
1508             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1509         }
1510         if (bDiv[YY] || bDiv[ZZ])
1511         {
1512             cartSetup.bCartesianPP_PME = TRUE;
1513             /* If we have 2D PME decomposition, which is always in x+y,
1514              * we stack the PME only nodes in z.
1515              * Otherwise we choose the direction that provides the thinnest slab
1516              * of PME only nodes as this will have the least effect
1517              * on the PP communication.
1518              * But for the PME communication the opposite might be better.
1519              */
1520             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1521                              !bDiv[YY] ||
1522                              numDDCells[YY] > numDDCells[ZZ]))
1523             {
1524                 cartSetup.cartpmedim = ZZ;
1525             }
1526             else
1527             {
1528                 cartSetup.cartpmedim = YY;
1529             }
1530             cartSetup.ntot[cartSetup.cartpmedim]
1531                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1532         }
1533         else
1534         {
1535             GMX_LOG(mdlog.info).appendTextFormatted(
1536                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1537                     ddRankSetup.numRanksDoingPme,
1538                     numDDCells[XX], numDDCells[YY],
1539                     numDDCells[XX], numDDCells[ZZ]);
1540             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1541         }
1542     }
1543
1544     if (cartSetup.bCartesianPP_PME)
1545     {
1546 #if GMX_MPI
1547         int  rank;
1548         ivec periods;
1549
1550         GMX_LOG(mdlog.info).appendTextFormatted(
1551                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1552                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1553
1554         for (int i = 0; i < DIM; i++)
1555         {
1556             periods[i] = TRUE;
1557         }
1558         MPI_Comm comm_cart;
1559         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1560                         &comm_cart);
1561         MPI_Comm_rank(comm_cart, &rank);
1562         if (MASTER(cr) && rank != 0)
1563         {
1564             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1565         }
1566
1567         /* With this assigment we loose the link to the original communicator
1568          * which will usually be MPI_COMM_WORLD, unless have multisim.
1569          */
1570         cr->mpi_comm_mysim = comm_cart;
1571         cr->sim_nodeid     = rank;
1572
1573         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1574
1575         GMX_LOG(mdlog.info).appendTextFormatted(
1576                 "Cartesian rank %d, coordinates %d %d %d\n",
1577                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1578
1579         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1580         {
1581             cr->duty = DUTY_PP;
1582         }
1583         if (!ddRankSetup.usePmeOnlyRanks ||
1584             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1585         {
1586             cr->duty = DUTY_PME;
1587         }
1588
1589         /* Split the sim communicator into PP and PME only nodes */
1590         MPI_Comm_split(cr->mpi_comm_mysim,
1591                        getThisRankDuties(cr),
1592                        dd_index(cartSetup.ntot, ddCellIndex),
1593                        &cr->mpi_comm_mygroup);
1594 #else
1595         GMX_UNUSED_VALUE(ddCellIndex);
1596 #endif
1597     }
1598     else
1599     {
1600         switch (ddRankOrder)
1601         {
1602             case DdRankOrder::pp_pme:
1603                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1604                 break;
1605             case DdRankOrder::interleave:
1606                 /* Interleave the PP-only and PME-only ranks */
1607                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1608                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1609                 break;
1610             case DdRankOrder::cartesian:
1611                 break;
1612             default:
1613                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1614         }
1615
1616         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1617         {
1618             cr->duty = DUTY_PME;
1619         }
1620         else
1621         {
1622             cr->duty = DUTY_PP;
1623         }
1624 #if GMX_MPI
1625         /* Split the sim communicator into PP and PME only nodes */
1626         MPI_Comm_split(cr->mpi_comm_mysim,
1627                        getThisRankDuties(cr),
1628                        cr->nodeid,
1629                        &cr->mpi_comm_mygroup);
1630         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1631 #endif
1632     }
1633
1634     GMX_LOG(mdlog.info).appendTextFormatted(
1635             "This rank does only %s work.\n",
1636             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1637
1638     return cartSetup;
1639 }
1640
1641 /*! \brief Makes the PP communicator and the PME communicator, when needed
1642  *
1643  * Returns the Cartesian rank setup.
1644  * Sets \p cr->mpi_comm_mygroup
1645  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1646  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1647  */
1648 static CartesianRankSetup
1649 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1650                        const DDSettings    &ddSettings,
1651                        const DdRankOrder    ddRankOrder,
1652                        const DDRankSetup   &ddRankSetup,
1653                        t_commrec           *cr,
1654                        ivec                 ddCellIndex,
1655                        std::vector<int>    *pmeRanks)
1656 {
1657     CartesianRankSetup cartSetup;
1658
1659     if (ddRankSetup.usePmeOnlyRanks)
1660     {
1661         /* Split the communicator into a PP and PME part */
1662         cartSetup =
1663             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1664                                ddRankSetup, ddCellIndex, pmeRanks);
1665     }
1666     else
1667     {
1668         /* All nodes do PP and PME */
1669         /* We do not require separate communicators */
1670         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1671
1672         cartSetup.bCartesianPP     = false;
1673         cartSetup.bCartesianPP_PME = false;
1674     }
1675
1676     return cartSetup;
1677 }
1678
1679 /*! \brief For PP ranks, sets or makes the communicator
1680  *
1681  * For PME ranks get the rank id.
1682  * For PP only ranks, sets the PME-only rank.
1683  */
1684 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1685                                     const DDSettings         &ddSettings,
1686                                     gmx::ArrayRef<const int>  pmeRanks,
1687                                     t_commrec                *cr,
1688                                     const int                 numAtomsInSystem,
1689                                     gmx_domdec_t             *dd)
1690 {
1691     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1692     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1693
1694     if (thisRankHasDuty(cr, DUTY_PP))
1695     {
1696         /* Copy or make a new PP communicator */
1697
1698         /* We (possibly) reordered the nodes in split_communicator,
1699          * so it is no longer required in make_pp_communicator.
1700          */
1701         const bool useCartesianReorder =
1702             (ddSettings.useCartesianReorder &&
1703              !cartSetup.bCartesianPP_PME);
1704
1705         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1706     }
1707     else
1708     {
1709         receive_ddindex2simnodeid(dd, cr);
1710     }
1711
1712     if (!thisRankHasDuty(cr, DUTY_PME))
1713     {
1714         /* Set up the commnuication to our PME node */
1715         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1716         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1717         if (debug)
1718         {
1719             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1720                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1721         }
1722     }
1723     else
1724     {
1725         dd->pme_nodeid = -1;
1726     }
1727
1728     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1729     if (MASTER(cr))
1730     {
1731         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1732                                                     numAtomsInSystem,
1733                                                     numAtomsInSystem);
1734     }
1735 }
1736
1737 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1738                           const char *dir, int nc, const char *size_string)
1739 {
1740     real  *slb_frac, tot;
1741     int    i, n;
1742     double dbl;
1743
1744     slb_frac = nullptr;
1745     if (nc > 1 && size_string != nullptr)
1746     {
1747         GMX_LOG(mdlog.info).appendTextFormatted(
1748                 "Using static load balancing for the %s direction", dir);
1749         snew(slb_frac, nc);
1750         tot = 0;
1751         for (i = 0; i < nc; i++)
1752         {
1753             dbl = 0;
1754             sscanf(size_string, "%20lf%n", &dbl, &n);
1755             if (dbl == 0)
1756             {
1757                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1758             }
1759             slb_frac[i]  = dbl;
1760             size_string += n;
1761             tot         += slb_frac[i];
1762         }
1763         /* Normalize */
1764         std::string relativeCellSizes = "Relative cell sizes:";
1765         for (i = 0; i < nc; i++)
1766         {
1767             slb_frac[i]       /= tot;
1768             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1769         }
1770         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1771     }
1772
1773     return slb_frac;
1774 }
1775
1776 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1777 {
1778     int                  n     = 0;
1779     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1780     int                  nmol;
1781     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1782     {
1783         for (auto &ilist : extractILists(*ilists, IF_BOND))
1784         {
1785             if (NRAL(ilist.functionType) >  2)
1786             {
1787                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1788             }
1789         }
1790     }
1791
1792     return n;
1793 }
1794
1795 static int dd_getenv(const gmx::MDLogger &mdlog,
1796                      const char *env_var, int def)
1797 {
1798     char *val;
1799     int   nst;
1800
1801     nst = def;
1802     val = getenv(env_var);
1803     if (val)
1804     {
1805         if (sscanf(val, "%20d", &nst) <= 0)
1806         {
1807             nst = 1;
1808         }
1809         GMX_LOG(mdlog.info).appendTextFormatted(
1810                 "Found env.var. %s = %s, using value %d",
1811                 env_var, val, nst);
1812     }
1813
1814     return nst;
1815 }
1816
1817 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1818                                   const t_inputrec    *ir,
1819                                   const gmx::MDLogger &mdlog)
1820 {
1821     if (ir->ePBC == epbcSCREW &&
1822         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1823     {
1824         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1825     }
1826
1827     if (ir->nstlist == 0)
1828     {
1829         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1830     }
1831
1832     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1833     {
1834         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1835     }
1836 }
1837
1838 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1839                                  const ivec         numDomains)
1840 {
1841     real r = ddbox.box_size[XX];
1842     for (int d = 0; d < DIM; d++)
1843     {
1844         if (numDomains[d] > 1)
1845         {
1846             /* Check using the initial average cell size */
1847             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1848         }
1849     }
1850
1851     return r;
1852 }
1853
1854 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1855  */
1856 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1857                                   const std::string   &reasonStr,
1858                                   const gmx::MDLogger &mdlog)
1859 {
1860     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1861     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1862
1863     if (cmdlineDlbState == DlbState::onUser)
1864     {
1865         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1866     }
1867     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1868     {
1869         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1870     }
1871     return DlbState::offForever;
1872 }
1873
1874 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1875  *
1876  * This function parses the parameters of "-dlb" command line option setting
1877  * corresponding state values. Then it checks the consistency of the determined
1878  * state with other run parameters and settings. As a result, the initial state
1879  * may be altered or an error may be thrown if incompatibility of options is detected.
1880  *
1881  * \param [in] mdlog       Logger.
1882  * \param [in] dlbOption   Enum value for the DLB option.
1883  * \param [in] bRecordLoad True if the load balancer is recording load information.
1884  * \param [in] mdrunOptions  Options for mdrun.
1885  * \param [in] ir          Pointer mdrun to input parameters.
1886  * \returns                DLB initial/startup state.
1887  */
1888 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1889                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1890                                          const gmx::MdrunOptions &mdrunOptions,
1891                                          const t_inputrec *ir)
1892 {
1893     DlbState dlbState = DlbState::offCanTurnOn;
1894
1895     switch (dlbOption)
1896     {
1897         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1898         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1899         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1900         default: gmx_incons("Invalid dlbOption enum value");
1901     }
1902
1903     /* Reruns don't support DLB: bail or override auto mode */
1904     if (mdrunOptions.rerun)
1905     {
1906         std::string reasonStr = "it is not supported in reruns.";
1907         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1908     }
1909
1910     /* Unsupported integrators */
1911     if (!EI_DYNAMICS(ir->eI))
1912     {
1913         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1914         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1915     }
1916
1917     /* Without cycle counters we can't time work to balance on */
1918     if (!bRecordLoad)
1919     {
1920         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1921         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1922     }
1923
1924     if (mdrunOptions.reproducible)
1925     {
1926         std::string reasonStr = "you started a reproducible run.";
1927         switch (dlbState)
1928         {
1929             case DlbState::offUser:
1930                 break;
1931             case DlbState::offForever:
1932                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1933                 break;
1934             case DlbState::offCanTurnOn:
1935                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1936             case DlbState::onCanTurnOff:
1937                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1938                 break;
1939             case DlbState::onUser:
1940                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1941             default:
1942                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1943         }
1944     }
1945
1946     return dlbState;
1947 }
1948
1949 static gmx_domdec_comm_t *init_dd_comm()
1950 {
1951     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1952
1953     comm->n_load_have      = 0;
1954     comm->n_load_collect   = 0;
1955
1956     comm->haveTurnedOffDlb = false;
1957
1958     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1959     {
1960         comm->sum_nat[i] = 0;
1961     }
1962     comm->ndecomp   = 0;
1963     comm->nload     = 0;
1964     comm->load_step = 0;
1965     comm->load_sum  = 0;
1966     comm->load_max  = 0;
1967     clear_ivec(comm->load_lim);
1968     comm->load_mdf  = 0;
1969     comm->load_pme  = 0;
1970
1971     /* This should be replaced by a unique pointer */
1972     comm->balanceRegion = ddBalanceRegionAllocate();
1973
1974     return comm;
1975 }
1976
1977 /* Returns whether mtop contains constraints and/or vsites */
1978 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1979 {
1980     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1981     int  nmol;
1982     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1983     {
1984         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1985         {
1986             return true;
1987         }
1988     }
1989
1990     return false;
1991 }
1992
1993 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1994                               const gmx_mtop_t    &mtop,
1995                               const t_inputrec    &inputrec,
1996                               const real           cutoffMargin,
1997                               DDSystemInfo        *systemInfo)
1998 {
1999     /* When we have constraints and/or vsites, it is beneficial to use
2000      * update groups (when possible) to allow independent update of groups.
2001      */
2002     if (!systemHasConstraintsOrVsites(mtop))
2003     {
2004         /* No constraints or vsites, atoms can be updated independently */
2005         return;
2006     }
2007
2008     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2009     systemInfo->useUpdateGroups               =
2010         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2011          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2012
2013     if (systemInfo->useUpdateGroups)
2014     {
2015         int numUpdateGroups = 0;
2016         for (const auto &molblock : mtop.molblock)
2017         {
2018             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2019         }
2020
2021         systemInfo->maxUpdateGroupRadius =
2022             computeMaxUpdateGroupRadius(mtop,
2023                                         systemInfo->updateGroupingPerMoleculetype,
2024                                         maxReferenceTemperature(inputrec));
2025
2026         /* To use update groups, the large domain-to-domain cutoff distance
2027          * should be compatible with the box size.
2028          */
2029         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2030
2031         if (systemInfo->useUpdateGroups)
2032         {
2033             GMX_LOG(mdlog.info).appendTextFormatted(
2034                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2035                     numUpdateGroups,
2036                     mtop.natoms/static_cast<double>(numUpdateGroups),
2037                     systemInfo->maxUpdateGroupRadius);
2038         }
2039         else
2040         {
2041             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2042             systemInfo->updateGroupingPerMoleculetype.clear();
2043         }
2044     }
2045 }
2046
2047 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2048     npbcdim(ePBC2npbcdim(ir.ePBC)),
2049     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2050     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2051     haveScrewPBC(ir.ePBC == epbcSCREW)
2052 {
2053 }
2054
2055 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2056 static bool
2057 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2058                         const bool                                   useUpdateGroups,
2059                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2060 {
2061     if (useUpdateGroups)
2062     {
2063         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2064                            "Need one grouping per moltype");
2065         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2066         {
2067             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2068             {
2069                 return false;
2070             }
2071         }
2072     }
2073     else
2074     {
2075         for (const auto &moltype : mtop.moltype)
2076         {
2077             if (moltype.atoms.nr > 1)
2078             {
2079                 return false;
2080             }
2081         }
2082     }
2083
2084     return true;
2085 }
2086
2087 /*! \brief Generate the simulation system information */
2088 static DDSystemInfo
2089 getSystemInfo(const gmx::MDLogger           &mdlog,
2090               const t_commrec               *cr,
2091               const DomdecOptions           &options,
2092               const gmx_mtop_t              &mtop,
2093               const t_inputrec              &ir,
2094               const matrix                   box,
2095               gmx::ArrayRef<const gmx::RVec> xGlobal)
2096 {
2097     const real         tenPercentMargin = 1.1;
2098
2099     DDSystemInfo       systemInfo;
2100
2101     /* We need to decide on update groups early, as this affects communication distances */
2102     systemInfo.useUpdateGroups = false;
2103     if (ir.cutoff_scheme == ecutsVERLET)
2104     {
2105         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2106         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2107     }
2108
2109     systemInfo.moleculesAreAlwaysWhole         =
2110         moleculesAreAlwaysWhole(mtop,
2111                                 systemInfo.useUpdateGroups,
2112                                 systemInfo.updateGroupingPerMoleculetype);
2113     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2114                                                   mtop.bIntermolecularInteractions);
2115     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2116                                                   multi_body_bondeds_count(&mtop) > 0);
2117
2118     if (systemInfo.useUpdateGroups)
2119     {
2120         systemInfo.haveSplitConstraints = false;
2121         systemInfo.haveSplitSettles     = false;
2122     }
2123     else
2124     {
2125         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2126                                            gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2127         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2128     }
2129
2130     if (ir.rlist == 0)
2131     {
2132         /* Set the cut-off to some very large value,
2133          * so we don't need if statements everywhere in the code.
2134          * We use sqrt, since the cut-off is squared in some places.
2135          */
2136         systemInfo.cutoff = GMX_CUTOFF_INF;
2137     }
2138     else
2139     {
2140         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2141     }
2142     systemInfo.minCutoffForMultiBody = 0;
2143
2144     /* Determine the minimum cell size limit, affected by many factors */
2145     systemInfo.cellsizeLimit             = 0;
2146     systemInfo.filterBondedCommunication = false;
2147
2148     /* We do not allow home atoms to move beyond the neighboring domain
2149      * between domain decomposition steps, which limits the cell size.
2150      * Get an estimate of cell size limit due to atom displacement.
2151      * In most cases this is a large overestimate, because it assumes
2152      * non-interaction atoms.
2153      * We set the chance to 1 in a trillion steps.
2154      */
2155     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2156     const real     limitForAtomDisplacement          =
2157         minCellSizeForAtomDisplacement(mtop, ir,
2158                                        systemInfo.updateGroupingPerMoleculetype,
2159                                        c_chanceThatAtomMovesBeyondDomain);
2160     GMX_LOG(mdlog.info).appendTextFormatted(
2161             "Minimum cell size due to atom displacement: %.3f nm",
2162             limitForAtomDisplacement);
2163
2164     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2165                                         limitForAtomDisplacement);
2166
2167     /* TODO: PME decomposition currently requires atoms not to be more than
2168      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2169      *       In nearly all cases, limitForAtomDisplacement will be smaller
2170      *       than 2/3*rlist, so the PME requirement is satisfied.
2171      *       But it would be better for both correctness and performance
2172      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2173      *       Note that we would need to improve the pairlist buffer case.
2174      */
2175
2176     if (systemInfo.haveInterDomainBondeds)
2177     {
2178         if (options.minimumCommunicationRange > 0)
2179         {
2180             systemInfo.minCutoffForMultiBody =
2181                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2182             if (options.useBondedCommunication)
2183             {
2184                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2185             }
2186             else
2187             {
2188                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2189                                              systemInfo.minCutoffForMultiBody);
2190             }
2191         }
2192         else if (ir.bPeriodicMols)
2193         {
2194             /* Can not easily determine the required cut-off */
2195             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2196             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2197         }
2198         else
2199         {
2200             real r_2b, r_mb;
2201
2202             if (MASTER(cr))
2203             {
2204                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2205                                       options.checkBondedInteractions,
2206                                       &r_2b, &r_mb);
2207             }
2208             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2209             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2210
2211             /* We use an initial margin of 10% for the minimum cell size,
2212              * except when we are just below the non-bonded cut-off.
2213              */
2214             if (options.useBondedCommunication)
2215             {
2216                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2217                 {
2218                     const real r_bonded              = std::max(r_2b, r_mb);
2219                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2220                     /* This is the (only) place where we turn on the filtering */
2221                     systemInfo.filterBondedCommunication = true;
2222                 }
2223                 else
2224                 {
2225                     const real r_bonded              = r_mb;
2226                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2227                                                                 systemInfo.cutoff);
2228                 }
2229                 /* We determine cutoff_mbody later */
2230                 systemInfo.increaseMultiBodyCutoff = true;
2231             }
2232             else
2233             {
2234                 /* No special bonded communication,
2235                  * simply increase the DD cut-off.
2236                  */
2237                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2238                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2239                                                             systemInfo.minCutoffForMultiBody);
2240             }
2241         }
2242         GMX_LOG(mdlog.info).appendTextFormatted(
2243                 "Minimum cell size due to bonded interactions: %.3f nm",
2244                 systemInfo.minCutoffForMultiBody);
2245
2246         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2247                                             systemInfo.minCutoffForMultiBody);
2248     }
2249
2250     systemInfo.constraintCommunicationRange = 0;
2251     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2252     {
2253         /* There is a cell size limit due to the constraints (P-LINCS) */
2254         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2255         GMX_LOG(mdlog.info).appendTextFormatted(
2256                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2257                 systemInfo.constraintCommunicationRange);
2258         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2259         {
2260             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2261         }
2262     }
2263     else if (options.constraintCommunicationRange > 0)
2264     {
2265         /* Here we do not check for dd->splitConstraints.
2266          * because one can also set a cell size limit for virtual sites only
2267          * and at this point we don't know yet if there are intercg v-sites.
2268          */
2269         GMX_LOG(mdlog.info).appendTextFormatted(
2270                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2271                 options.constraintCommunicationRange);
2272         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2273     }
2274     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2275                                         systemInfo.constraintCommunicationRange);
2276
2277     return systemInfo;
2278 }
2279
2280 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2281  * implemented. */
2282 static void
2283 checkDDGridSetup(const DDGridSetup   &ddGridSetup,
2284                  const t_commrec     *cr,
2285                  const DomdecOptions &options,
2286                  const DDSettings    &ddSettings,
2287                  const DDSystemInfo  &systemInfo,
2288                  const real           cellsizeLimit,
2289                  const gmx_ddbox_t   &ddbox)
2290 {
2291     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2292     {
2293         char     buf[STRLEN];
2294         gmx_bool bC = (systemInfo.haveSplitConstraints &&
2295                        systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2296         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2297                 !bC ? "-rdd" : "-rcon",
2298                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2299                 bC ? " or your LINCS settings" : "");
2300
2301         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2302                              "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2303                              "%s\n"
2304                              "Look in the log file for details on the domain decomposition",
2305                              cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2306                              cellsizeLimit,
2307                              buf);
2308     }
2309
2310     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2311     if (acs < cellsizeLimit)
2312     {
2313         if (options.numCells[XX] <= 0)
2314         {
2315             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2316         }
2317         else
2318         {
2319             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2320                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2321                                  acs, cellsizeLimit);
2322         }
2323     }
2324
2325     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2326     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2327     {
2328         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2329                              "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2330                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2331     }
2332     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2333     {
2334         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2335                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2336     }
2337 }
2338
2339 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2340 static DDRankSetup
2341 getDDRankSetup(const gmx::MDLogger &mdlog,
2342                t_commrec           *cr,
2343                const DDGridSetup   &ddGridSetup,
2344                const t_inputrec    &ir)
2345 {
2346     GMX_LOG(mdlog.info).appendTextFormatted(
2347             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2348             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2349             ddGridSetup.numPmeOnlyRanks);
2350
2351     DDRankSetup ddRankSetup;
2352
2353     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2354     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2355
2356     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2357     if (ddRankSetup.usePmeOnlyRanks)
2358     {
2359         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2360     }
2361     else
2362     {
2363         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2364     }
2365
2366     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2367     {
2368         /* The following choices should match those
2369          * in comm_cost_est in domdec_setup.c.
2370          * Note that here the checks have to take into account
2371          * that the decomposition might occur in a different order than xyz
2372          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2373          * in which case they will not match those in comm_cost_est,
2374          * but since that is mainly for testing purposes that's fine.
2375          */
2376         if (ddGridSetup.numDDDimensions >= 2 &&
2377             ddGridSetup.ddDimensions[0] == XX &&
2378             ddGridSetup.ddDimensions[1] == YY &&
2379             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2380             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2381             getenv("GMX_PMEONEDD") == nullptr)
2382         {
2383             ddRankSetup.npmedecompdim = 2;
2384             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2385             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2386         }
2387         else
2388         {
2389             /* In case nc is 1 in both x and y we could still choose to
2390              * decompose pme in y instead of x, but we use x for simplicity.
2391              */
2392             ddRankSetup.npmedecompdim = 1;
2393             if (ddGridSetup.ddDimensions[0] == YY)
2394             {
2395                 ddRankSetup.npmenodes_x = 1;
2396                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2397             }
2398             else
2399             {
2400                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2401                 ddRankSetup.npmenodes_y = 1;
2402             }
2403         }
2404         GMX_LOG(mdlog.info).appendTextFormatted(
2405                 "PME domain decomposition: %d x %d x %d",
2406                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2407     }
2408     else
2409     {
2410         ddRankSetup.npmedecompdim = 0;
2411         ddRankSetup.npmenodes_x   = 0;
2412         ddRankSetup.npmenodes_y   = 0;
2413     }
2414
2415     return ddRankSetup;
2416 }
2417
2418 /*! \brief Set the cell size and interaction limits */
2419 static void set_dd_limits(const gmx::MDLogger &mdlog,
2420                           t_commrec *cr, gmx_domdec_t *dd,
2421                           const DomdecOptions &options,
2422                           const DDSettings &ddSettings,
2423                           const DDSystemInfo &systemInfo,
2424                           const DDGridSetup &ddGridSetup,
2425                           const int numPPRanks,
2426                           const gmx_mtop_t *mtop,
2427                           const t_inputrec *ir,
2428                           const gmx_ddbox_t &ddbox)
2429 {
2430     gmx_domdec_comm_t *comm = dd->comm;
2431     comm->ddSettings        = ddSettings;
2432
2433     /* Initialize to GPU share count to 0, might change later */
2434     comm->nrank_gpu_shared = 0;
2435
2436     comm->dlbState         = comm->ddSettings.initialDlbState;
2437     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2438     /* To consider turning DLB on after 2*nstlist steps we need to check
2439      * at partitioning count 3. Thus we need to increase the first count by 2.
2440      */
2441     comm->ddPartioningCountFirstDlbOff += 2;
2442
2443     comm->bPMELoadBalDLBLimits          = FALSE;
2444
2445     /* Allocate the charge group/atom sorting struct */
2446     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2447
2448     comm->systemInfo = systemInfo;
2449
2450     if (systemInfo.useUpdateGroups)
2451     {
2452         /* Note: We would like to use dd->nnodes for the atom count estimate,
2453          *       but that is not yet available here. But this anyhow only
2454          *       affect performance up to the second dd_partition_system call.
2455          */
2456         const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
2457         comm->updateGroupsCog =
2458             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2459                                                    systemInfo.updateGroupingPerMoleculetype,
2460                                                    maxReferenceTemperature(*ir),
2461                                                    homeAtomCountEstimate);
2462     }
2463
2464     /* Set the DD setup given by ddGridSetup */
2465     copy_ivec(ddGridSetup.numDomains, dd->nc);
2466     dd->ndim = ddGridSetup.numDDDimensions;
2467     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2468
2469     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2470
2471     snew(comm->slb_frac, DIM);
2472     if (isDlbDisabled(comm))
2473     {
2474         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2475         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2476         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2477     }
2478
2479     /* Set the multi-body cut-off and cellsize limit for DLB */
2480     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2481     comm->cellsize_limit = systemInfo.cellsizeLimit;
2482     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2483     {
2484         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2485         {
2486             /* Set the bonded communication distance to halfway
2487              * the minimum and the maximum,
2488              * since the extra communication cost is nearly zero.
2489              */
2490             real acs           = average_cellsize_min(ddbox, dd->nc);
2491             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2492             if (!isDlbDisabled(comm))
2493             {
2494                 /* Check if this does not limit the scaling */
2495                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2496                                               options.dlbScaling*acs);
2497             }
2498             if (!systemInfo.filterBondedCommunication)
2499             {
2500                 /* Without bBondComm do not go beyond the n.b. cut-off */
2501                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2502                 if (comm->cellsize_limit >= systemInfo.cutoff)
2503                 {
2504                     /* We don't loose a lot of efficieny
2505                      * when increasing it to the n.b. cut-off.
2506                      * It can even be slightly faster, because we need
2507                      * less checks for the communication setup.
2508                      */
2509                     comm->cutoff_mbody = systemInfo.cutoff;
2510                 }
2511             }
2512             /* Check if we did not end up below our original limit */
2513             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2514                                           systemInfo.minCutoffForMultiBody);
2515
2516             if (comm->cutoff_mbody > comm->cellsize_limit)
2517             {
2518                 comm->cellsize_limit = comm->cutoff_mbody;
2519             }
2520         }
2521         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2522     }
2523
2524     if (debug)
2525     {
2526         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2527                 "cellsize limit %f\n",
2528                 gmx::boolToString(systemInfo.filterBondedCommunication),
2529                 comm->cellsize_limit);
2530     }
2531
2532     if (MASTER(cr))
2533     {
2534         check_dd_restrictions(dd, ir, mdlog);
2535     }
2536 }
2537
2538 void dd_init_bondeds(FILE                       *fplog,
2539                      gmx_domdec_t               *dd,
2540                      const gmx_mtop_t           *mtop,
2541                      const gmx_vsite_t          *vsite,
2542                      const t_inputrec           *ir,
2543                      gmx_bool                    bBCheck,
2544                      cginfo_mb_t                *cginfo_mb)
2545 {
2546     gmx_domdec_comm_t *comm;
2547
2548     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2549
2550     comm = dd->comm;
2551
2552     if (comm->systemInfo.filterBondedCommunication)
2553     {
2554         /* Communicate atoms beyond the cut-off for bonded interactions */
2555         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2556     }
2557     else
2558     {
2559         /* Only communicate atoms based on cut-off */
2560         comm->bondedLinks = nullptr;
2561     }
2562 }
2563
2564 static void writeSettings(gmx::TextWriter       *log,
2565                           gmx_domdec_t          *dd,
2566                           const gmx_mtop_t      *mtop,
2567                           const t_inputrec      *ir,
2568                           gmx_bool               bDynLoadBal,
2569                           real                   dlb_scale,
2570                           const gmx_ddbox_t     *ddbox)
2571 {
2572     gmx_domdec_comm_t *comm;
2573     int                d;
2574     ivec               np;
2575     real               limit, shrink;
2576
2577     comm = dd->comm;
2578
2579     if (bDynLoadBal)
2580     {
2581         log->writeString("The maximum number of communication pulses is:");
2582         for (d = 0; d < dd->ndim; d++)
2583         {
2584             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2585         }
2586         log->ensureLineBreak();
2587         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2588         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2589         log->writeString("The allowed shrink of domain decomposition cells is:");
2590         for (d = 0; d < DIM; d++)
2591         {
2592             if (dd->nc[d] > 1)
2593             {
2594                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2595                 {
2596                     shrink = 0;
2597                 }
2598                 else
2599                 {
2600                     shrink =
2601                         comm->cellsize_min_dlb[d]/
2602                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2603                 }
2604                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2605             }
2606         }
2607         log->ensureLineBreak();
2608     }
2609     else
2610     {
2611         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2612         log->writeString("The initial number of communication pulses is:");
2613         for (d = 0; d < dd->ndim; d++)
2614         {
2615             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2616         }
2617         log->ensureLineBreak();
2618         log->writeString("The initial domain decomposition cell size is:");
2619         for (d = 0; d < DIM; d++)
2620         {
2621             if (dd->nc[d] > 1)
2622             {
2623                 log->writeStringFormatted(" %c %.2f nm",
2624                                           dim2char(d), dd->comm->cellsize_min[d]);
2625             }
2626         }
2627         log->ensureLineBreak();
2628         log->writeLine();
2629     }
2630
2631     const bool haveInterDomainVsites =
2632         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2633
2634     if (comm->systemInfo.haveInterDomainBondeds ||
2635         haveInterDomainVsites ||
2636         comm->systemInfo.haveSplitConstraints ||
2637         comm->systemInfo.haveSplitSettles)
2638     {
2639         std::string decompUnits;
2640         if (comm->systemInfo.useUpdateGroups)
2641         {
2642             decompUnits = "atom groups";
2643         }
2644         else
2645         {
2646             decompUnits = "atoms";
2647         }
2648
2649         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2650         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2651
2652         if (bDynLoadBal)
2653         {
2654             limit = dd->comm->cellsize_limit;
2655         }
2656         else
2657         {
2658             if (dd->unitCellInfo.ddBoxIsDynamic)
2659             {
2660                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2661             }
2662             limit = dd->comm->cellsize_min[XX];
2663             for (d = 1; d < DIM; d++)
2664             {
2665                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2666             }
2667         }
2668
2669         if (comm->systemInfo.haveInterDomainBondeds)
2670         {
2671             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2672                                     "two-body bonded interactions", "(-rdd)",
2673                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2674             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2675                                     "multi-body bonded interactions", "(-rdd)",
2676                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2677         }
2678         if (haveInterDomainVsites)
2679         {
2680             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2681                                     "virtual site constructions", "(-rcon)", limit);
2682         }
2683         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2684         {
2685             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2686                                                        1+ir->nProjOrder);
2687             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2688                                     separation.c_str(), "(-rcon)", limit);
2689         }
2690         log->ensureLineBreak();
2691     }
2692 }
2693
2694 static void logSettings(const gmx::MDLogger &mdlog,
2695                         gmx_domdec_t        *dd,
2696                         const gmx_mtop_t    *mtop,
2697                         const t_inputrec    *ir,
2698                         real                 dlb_scale,
2699                         const gmx_ddbox_t   *ddbox)
2700 {
2701     gmx::StringOutputStream stream;
2702     gmx::TextWriter         log(&stream);
2703     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2704     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2705     {
2706         {
2707             log.ensureEmptyLine();
2708             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2709         }
2710         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2711     }
2712     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2713 }
2714
2715 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2716                                 gmx_domdec_t        *dd,
2717                                 real                 dlb_scale,
2718                                 const t_inputrec    *ir,
2719                                 const gmx_ddbox_t   *ddbox)
2720 {
2721     gmx_domdec_comm_t *comm;
2722     int                d, dim, npulse, npulse_d_max, npulse_d;
2723     gmx_bool           bNoCutOff;
2724
2725     comm = dd->comm;
2726
2727     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2728
2729     /* Determine the maximum number of comm. pulses in one dimension */
2730
2731     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2732
2733     /* Determine the maximum required number of grid pulses */
2734     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2735     {
2736         /* Only a single pulse is required */
2737         npulse = 1;
2738     }
2739     else if (!bNoCutOff && comm->cellsize_limit > 0)
2740     {
2741         /* We round down slightly here to avoid overhead due to the latency
2742          * of extra communication calls when the cut-off
2743          * would be only slightly longer than the cell size.
2744          * Later cellsize_limit is redetermined,
2745          * so we can not miss interactions due to this rounding.
2746          */
2747         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2748     }
2749     else
2750     {
2751         /* There is no cell size limit */
2752         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2753     }
2754
2755     if (!bNoCutOff && npulse > 1)
2756     {
2757         /* See if we can do with less pulses, based on dlb_scale */
2758         npulse_d_max = 0;
2759         for (d = 0; d < dd->ndim; d++)
2760         {
2761             dim      = dd->dim[d];
2762             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2763                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2764             npulse_d_max = std::max(npulse_d_max, npulse_d);
2765         }
2766         npulse = std::min(npulse, npulse_d_max);
2767     }
2768
2769     /* This env var can override npulse */
2770     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2771     if (d > 0)
2772     {
2773         npulse = d;
2774     }
2775
2776     comm->maxpulse       = 1;
2777     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2778     for (d = 0; d < dd->ndim; d++)
2779     {
2780         if (comm->ddSettings.request1DAnd1Pulse)
2781         {
2782             comm->cd[d].np_dlb = 1;
2783         }
2784         else
2785         {
2786             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2787             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2788         }
2789         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2790         {
2791             comm->bVacDLBNoLimit = FALSE;
2792         }
2793     }
2794
2795     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2796     if (!comm->bVacDLBNoLimit)
2797     {
2798         comm->cellsize_limit = std::max(comm->cellsize_limit,
2799                                         comm->systemInfo.cutoff/comm->maxpulse);
2800     }
2801     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2802     /* Set the minimum cell size for each DD dimension */
2803     for (d = 0; d < dd->ndim; d++)
2804     {
2805         if (comm->bVacDLBNoLimit ||
2806             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2807         {
2808             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2809         }
2810         else
2811         {
2812             comm->cellsize_min_dlb[dd->dim[d]] =
2813                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2814         }
2815     }
2816     if (comm->cutoff_mbody <= 0)
2817     {
2818         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2819     }
2820     if (isDlbOn(comm))
2821     {
2822         set_dlb_limits(dd);
2823     }
2824 }
2825
2826 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2827 {
2828     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2829 }
2830
2831 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2832 {
2833     /* If each molecule is a single charge group
2834      * or we use domain decomposition for each periodic dimension,
2835      * we do not need to take pbc into account for the bonded interactions.
2836      */
2837     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2838             !(dd->nc[XX] > 1 &&
2839               dd->nc[YY] > 1 &&
2840               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2841 }
2842
2843 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2844 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2845                                   gmx_domdec_t *dd, real dlb_scale,
2846                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2847                                   const gmx_ddbox_t *ddbox)
2848 {
2849     gmx_domdec_comm_t *comm        = dd->comm;
2850     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2851
2852     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2853     {
2854         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2855         if (ddRankSetup.npmedecompdim >= 2)
2856         {
2857             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2858         }
2859     }
2860     else
2861     {
2862         ddRankSetup.numRanksDoingPme = 0;
2863         if (dd->pme_nodeid >= 0)
2864         {
2865             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2866                                  "Can not have separate PME ranks without PME electrostatics");
2867         }
2868     }
2869
2870     if (debug)
2871     {
2872         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2873     }
2874     if (!isDlbDisabled(comm))
2875     {
2876         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2877     }
2878
2879     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2880
2881     real vol_frac;
2882     if (ir->ePBC == epbcNONE)
2883     {
2884         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2885     }
2886     else
2887     {
2888         vol_frac =
2889             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2890     }
2891     if (debug)
2892     {
2893         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2894     }
2895     int natoms_tot = mtop->natoms;
2896
2897     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
2898                                      static_cast<int>(vol_frac*natoms_tot));
2899 }
2900
2901 /*! \brief Get some important DD parameters which can be modified by env.vars */
2902 static DDSettings
2903 getDDSettings(const gmx::MDLogger     &mdlog,
2904               const DomdecOptions     &options,
2905               const gmx::MdrunOptions &mdrunOptions,
2906               const t_inputrec        &ir)
2907 {
2908     DDSettings ddSettings;
2909
2910     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2911     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2912     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2913     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2914     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2915     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2916     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2917     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2918     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2919     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2920
2921     if (ddSettings.useSendRecv2)
2922     {
2923         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2924     }
2925
2926     if (ddSettings.eFlop)
2927     {
2928         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2929         ddSettings.recordLoad = true;
2930     }
2931     else
2932     {
2933         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2934     }
2935
2936     ddSettings.initialDlbState =
2937         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2938     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2939                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2940
2941     return ddSettings;
2942 }
2943
2944 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2945     unitCellInfo(ir)
2946 {
2947 }
2948
2949 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2950  *
2951  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2952  * generally requires a larger box than other possible decompositions
2953  * with the same rank count, so the calling code might need to decide
2954  * what is the most appropriate way to run the simulation based on
2955  * whether such a DD is possible.
2956  *
2957  * This function works like init_domain_decomposition(), but will not
2958  * give a fatal error, and only uses \c cr for communicating between
2959  * ranks.
2960  *
2961  * It is safe to call before thread-MPI spawns ranks, so that
2962  * thread-MPI can decide whether and how to trigger the GPU halo
2963  * exchange code path. The number of PME ranks, if any, should be set
2964  * in \c options.numPmeRanks.
2965  */
2966 static bool
2967 canMake1DAnd1PulseDomainDecomposition(const DDSettings              &ddSettingsOriginal,
2968                                       const t_commrec               *cr,
2969                                       const int                      numRanksRequested,
2970                                       const DomdecOptions           &options,
2971                                       const gmx_mtop_t              &mtop,
2972                                       const t_inputrec              &ir,
2973                                       const matrix                   box,
2974                                       gmx::ArrayRef<const gmx::RVec> xGlobal)
2975 {
2976     // Ensure we don't write any output from this checking routine
2977     gmx::MDLogger dummyLogger;
2978
2979     DDSystemInfo  systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2980
2981     DDSettings    ddSettings = ddSettingsOriginal;
2982     ddSettings.request1DAnd1Pulse = true;
2983     const real    gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2984                                                                        !isDlbDisabled(ddSettings.initialDlbState),
2985                                                                        options.dlbScaling, ir,
2986                                                                        systemInfo.cellsizeLimit);
2987     gmx_ddbox_t ddbox       = {0};
2988     DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2989                                              ddSettings, systemInfo, gridSetupCellsizeLimit,
2990                                              mtop, ir, box, xGlobal, &ddbox);
2991
2992     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2993
2994     return canMakeDDWith1DAnd1Pulse;
2995 }
2996
2997 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2998 {
2999     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
3000     const int  productOfDimensionSizes      = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
3001     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
3002
3003     const bool hasMax1Pulse =
3004         ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3005          (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3006
3007     return decompositionHasOneDimension && hasMax1Pulse;
3008
3009 }
3010
3011 namespace gmx
3012 {
3013
3014 // TODO once the functionality stablizes, move this class and
3015 // supporting functionality into builder.cpp
3016 /*! \brief Impl class for DD builder */
3017 class DomainDecompositionBuilder::Impl
3018 {
3019     public:
3020         //! Constructor
3021         Impl(const MDLogger      &mdlog,
3022              t_commrec           *cr,
3023              const DomdecOptions &options,
3024              const MdrunOptions  &mdrunOptions,
3025              bool                 prefer1DAnd1Pulse,
3026              const gmx_mtop_t    &mtop,
3027              const t_inputrec    &ir,
3028              const matrix         box,
3029              ArrayRef<const RVec> xGlobal);
3030
3031         //! Build the resulting DD manager
3032         gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3033
3034         //! Objects used in constructing and configuring DD
3035         //! {
3036         //! Logging object
3037         const MDLogger      &mdlog_;
3038         //! Communication object
3039         t_commrec           *cr_;
3040         //! User-supplied options configuring DD behavior
3041         const DomdecOptions  options_;
3042         //! Global system topology
3043         const gmx_mtop_t    &mtop_;
3044         //! User input values from the tpr file
3045         const t_inputrec    &ir_;
3046         //! }
3047
3048         //! Internal objects used in constructing DD
3049         //! {
3050         //! Settings combined from the user input
3051         DDSettings           ddSettings_;
3052         //! Information derived from the simulation system
3053         DDSystemInfo         systemInfo_;
3054         //! Box structure
3055         gmx_ddbox_t          ddbox_ = { 0 };
3056         //! Organization of the DD grids
3057         DDGridSetup          ddGridSetup_;
3058         //! Organzation of the DD ranks
3059         DDRankSetup          ddRankSetup_;
3060         //! Number of DD cells in each dimension
3061         ivec                 ddCellIndex_ = { 0, 0, 0 };
3062         //! IDs of PME-only ranks
3063         std::vector<int>     pmeRanks_;
3064         //! Contains a valid Cartesian-communicator-based setup, or defaults.
3065         CartesianRankSetup   cartSetup_;
3066         //! }
3067
3068 };
3069
3070 DomainDecompositionBuilder::Impl::Impl(const MDLogger      &mdlog,
3071                                        t_commrec           *cr,
3072                                        const DomdecOptions &options,
3073                                        const MdrunOptions  &mdrunOptions,
3074                                        const bool           prefer1DAnd1Pulse,
3075                                        const gmx_mtop_t    &mtop,
3076                                        const t_inputrec    &ir,
3077                                        const matrix         box,
3078                                        ArrayRef<const RVec> xGlobal)
3079     : mdlog_(mdlog),
3080       cr_(cr),
3081       options_(options),
3082       mtop_(mtop),
3083       ir_(ir)
3084 {
3085     GMX_LOG(mdlog_.info).appendTextFormatted(
3086             "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3087
3088     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3089
3090     if (prefer1DAnd1Pulse &&
3091         canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3092                                               mtop_, ir_, box, xGlobal))
3093     {
3094         ddSettings_.request1DAnd1Pulse = true;
3095     }
3096
3097     if (ddSettings_.eFlop > 1)
3098     {
3099         /* Ensure that we have different random flop counts on different ranks */
3100         srand(1 + cr_->nodeid);
3101     }
3102
3103     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3104
3105     const int numRanksRequested = cr_->nnodes;
3106     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3107
3108     // DD grid setup uses a more different cell size limit for
3109     // automated setup than the one in systemInfo_. The latter is used
3110     // in set_dd_limits() to configure DLB, for example.
3111     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3112                                                                     !isDlbDisabled(ddSettings_.initialDlbState),
3113                                                                     options_.dlbScaling, ir_,
3114                                                                     systemInfo_.cellsizeLimit);
3115     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3116                                   ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3117                                   mtop_, ir_, box, xGlobal, &ddbox_);
3118     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3119
3120     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3121
3122     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3123
3124     /* Generate the group communicator, also decides the duty of each rank */
3125     cartSetup_   =
3126         makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3127                                ddRankSetup_, cr_,
3128                                ddCellIndex_, &pmeRanks_);
3129 }
3130
3131 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3132 {
3133     gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3134
3135     copy_ivec(ddCellIndex_, dd->ci);
3136
3137     dd->comm = init_dd_comm();
3138
3139     dd->comm->ddRankSetup        = ddRankSetup_;
3140     dd->comm->cartesianRankSetup = cartSetup_;
3141
3142     set_dd_limits(mdlog_, cr_, dd, options_,
3143                   ddSettings_, systemInfo_,
3144                   ddGridSetup_,
3145                   ddRankSetup_.numPPRanks,
3146                   &mtop_, &ir_,
3147                   ddbox_);
3148
3149     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3150
3151     if (thisRankHasDuty(cr_, DUTY_PP))
3152     {
3153         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3154
3155         setup_neighbor_relations(dd);
3156     }
3157
3158     /* Set overallocation to avoid frequent reallocation of arrays */
3159     set_over_alloc_dd(TRUE);
3160
3161     dd->atomSets = atomSets;
3162
3163     return dd;
3164 }
3165
3166 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger      &mdlog,
3167                                                        t_commrec           *cr,
3168                                                        const DomdecOptions &options,
3169                                                        const MdrunOptions  &mdrunOptions,
3170                                                        const bool           prefer1DAnd1Pulse,
3171                                                        const gmx_mtop_t    &mtop,
3172                                                        const t_inputrec    &ir,
3173                                                        const matrix         box,
3174                                                        ArrayRef<const RVec> xGlobal)
3175     : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3176 {
3177 }
3178
3179 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3180 {
3181     return impl_->build(atomSets);
3182 }
3183
3184 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3185
3186 } // namespace gmx
3187
3188 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3189                                const matrix                   box,
3190                                gmx::ArrayRef<const gmx::RVec> x,
3191                                real                           cutoffRequested)
3192 {
3193     gmx_domdec_t *dd;
3194     gmx_ddbox_t   ddbox;
3195     int           d, dim, np;
3196     real          inv_cell_size;
3197     int           LocallyLimited;
3198
3199     dd = cr->dd;
3200
3201     set_ddbox(*dd, false, box, true, x, &ddbox);
3202
3203     LocallyLimited = 0;
3204
3205     for (d = 0; d < dd->ndim; d++)
3206     {
3207         dim = dd->dim[d];
3208
3209         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3210         if (dd->unitCellInfo.ddBoxIsDynamic)
3211         {
3212             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3213         }
3214
3215         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3216
3217         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3218         {
3219             return FALSE;
3220         }
3221
3222         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3223         {
3224             if (np > dd->comm->cd[d].np_dlb)
3225             {
3226                 return FALSE;
3227             }
3228
3229             /* If a current local cell size is smaller than the requested
3230              * cut-off, we could still fix it, but this gets very complicated.
3231              * Without fixing here, we might actually need more checks.
3232              */
3233             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3234             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3235             {
3236                 LocallyLimited = 1;
3237             }
3238         }
3239     }
3240
3241     if (!isDlbDisabled(dd->comm))
3242     {
3243         /* If DLB is not active yet, we don't need to check the grid jumps.
3244          * Actually we shouldn't, because then the grid jump data is not set.
3245          */
3246         if (isDlbOn(dd->comm) &&
3247             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3248         {
3249             LocallyLimited = 1;
3250         }
3251
3252         gmx_sumi(1, &LocallyLimited, cr);
3253
3254         if (LocallyLimited > 0)
3255         {
3256             return FALSE;
3257         }
3258     }
3259
3260     return TRUE;
3261 }
3262
3263 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3264                           const matrix                   box,
3265                           gmx::ArrayRef<const gmx::RVec> x,
3266                           real                           cutoffRequested)
3267 {
3268     gmx_bool bCutoffAllowed;
3269
3270     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3271
3272     if (bCutoffAllowed)
3273     {
3274         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3275     }
3276
3277     return bCutoffAllowed;
3278 }