src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/options.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/gpu_utils/gpu_utils.h"
  68 #include "gromacs/hardware/hw_info.h"
  69 #include "gromacs/listed_forces/manage_threading.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/math/vectypes.h"
  72 #include "gromacs/mdlib/calc_verletbuf.h"
  73 #include "gromacs/mdlib/constr.h"
  74 #include "gromacs/mdlib/constraintrange.h"
  75 #include "gromacs/mdlib/updategroups.h"
  76 #include "gromacs/mdlib/vsite.h"
  77 #include "gromacs/mdtypes/commrec.h"
  78 #include "gromacs/mdtypes/forceoutput.h"
  79 #include "gromacs/mdtypes/inputrec.h"
  80 #include "gromacs/mdtypes/mdrunoptions.h"
  81 #include "gromacs/mdtypes/state.h"
  82 #include "gromacs/pbcutil/ishift.h"
  83 #include "gromacs/pbcutil/pbc.h"
  84 #include "gromacs/pulling/pull.h"
  85 #include "gromacs/timing/wallcycle.h"
  86 #include "gromacs/topology/block.h"
  87 #include "gromacs/topology/idef.h"
  88 #include "gromacs/topology/ifunc.h"
  89 #include "gromacs/topology/mtop_lookup.h"
  90 #include "gromacs/topology/mtop_util.h"
  91 #include "gromacs/topology/topology.h"
  92 #include "gromacs/utility/basedefinitions.h"
  93 #include "gromacs/utility/basenetwork.h"
  94 #include "gromacs/utility/cstringutil.h"
  95 #include "gromacs/utility/exceptions.h"
  96 #include "gromacs/utility/fatalerror.h"
  97 #include "gromacs/utility/gmxmpi.h"
  98 #include "gromacs/utility/logger.h"
  99 #include "gromacs/utility/real.h"
 100 #include "gromacs/utility/smalloc.h"
 101 #include "gromacs/utility/strconvert.h"
 102 #include "gromacs/utility/stringstream.h"
 103 #include "gromacs/utility/stringutil.h"
 104 #include "gromacs/utility/textwriter.h"
 105
 106 #include "atomdistribution.h"
 107 #include "box.h"
 108 #include "cellsizes.h"
 109 #include "distribute.h"
 110 #include "domdec_constraints.h"
 111 #include "domdec_internal.h"
 112 #include "domdec_setup.h"
 113 #include "domdec_vsite.h"
 114 #include "redistribute.h"
 115 #include "utility.h"
 116
 117 // TODO remove this when moving domdec into gmx namespace
 118 using gmx::DdRankOrder;
 119 using gmx::DlbOption;
 120 using gmx::DomdecOptions;
 121
 122 static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 123
 124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_CGIBS 2
 126
 127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 128 #define DD_FLAG_NRCG 65535
 129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 131
 132 /* The DD zone order */
 133 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 134                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 135
 136 /* The non-bonded zone-pair setup for domain decomposition
 137  * The first number is the i-zone, the second number the first j-zone seen by
 138  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 139  * As is, this is for 3D decomposition, where there are 4 i-zones.
 140  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 141  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 142  */
 143 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 144                                                                { 1, 3, 6 },
 145                                                                { 2, 5, 6 },
 146                                                                { 3, 5, 7 } };
 147
 148
 149 /*
 150    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 151
 152    static void index2xyz(ivec nc,int ind,ivec xyz)
 153    {
 154    xyz[XX] = ind % nc[XX];
 155    xyz[YY] = (ind / nc[XX]) % nc[YY];
 156    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 157    }
 158  */
 159
 160 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 161 {
 162     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 163     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 164     xyz[ZZ] = ind % nc[ZZ];
 165 }
 166
 167 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 168 {
 169     int ddnodeid = -1;
 170
 171     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 172     const int                 ddindex   = dd_index(dd->nc, c);
 173     if (cartSetup.bCartesianPP_PME)
 174     {
 175         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 176     }
 177     else if (cartSetup.bCartesianPP)
 178     {
 179 #if GMX_MPI
 180         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 181 #endif
 182     }
 183     else
 184     {
 185         ddnodeid = ddindex;
 186     }
 187
 188     return ddnodeid;
 189 }
 190
 191 int ddglatnr(const gmx_domdec_t* dd, int i)
 192 {
 193     int atnr;
 194
 195     if (dd == nullptr)
 196     {
 197         atnr = i + 1;
 198     }
 199     else
 200     {
 201         if (i >= dd->comm->atomRanges.numAtomsTotal())
 202         {
 203             gmx_fatal(FARGS,
 204                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 205                       i, dd->comm->atomRanges.numAtomsTotal());
 206         }
 207         atnr = dd->globalAtomIndices[i] + 1;
 208     }
 209
 210     return atnr;
 211 }
 212
 213 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 214 {
 215     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 216     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 217 }
 218
 219 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 220 {
 221     int i;
 222
 223     if (state->ddp_count != dd->ddp_count)
 224     {
 225         gmx_incons("The MD state does not match the domain decomposition state");
 226     }
 227
 228     state->cg_gl.resize(dd->ncg_home);
 229     for (i = 0; i < dd->ncg_home; i++)
 230     {
 231         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 232     }
 233
 234     state->ddp_count_cg_gl = dd->ddp_count;
 235 }
 236
 237 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 238 {
 239     return &dd->comm->zones;
 240 }
 241
 242 int dd_numAtomsZones(const gmx_domdec_t& dd)
 243 {
 244     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 245 }
 246
 247 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 248 {
 249     return dd.comm->atomRanges.numHomeAtoms();
 250 }
 251
 252 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 253 {
 254     /* We currently set mdatoms entries for all atoms:
 255      * local + non-local + communicated for vsite + constraints
 256      */
 257
 258     return dd->comm->atomRanges.numAtomsTotal();
 259 }
 260
 261 int dd_natoms_vsite(const gmx_domdec_t* dd)
 262 {
 263     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 264 }
 265
 266 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 267 {
 268     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 269     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 270 }
 271
 272 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 273 {
 274     wallcycle_start(wcycle, ewcMOVEX);
 275
 276     int                    nzone, nat_tot;
 277     gmx_domdec_comm_t*     comm;
 278     gmx_domdec_comm_dim_t* cd;
 279     rvec                   shift = { 0, 0, 0 };
 280     gmx_bool               bPBC, bScrew;
 281
 282     comm = dd->comm;
 283
 284     nzone   = 1;
 285     nat_tot = comm->atomRanges.numHomeAtoms();
 286     for (int d = 0; d < dd->ndim; d++)
 287     {
 288         bPBC   = (dd->ci[dd->dim[d]] == 0);
 289         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 290         if (bPBC)
 291         {
 292             copy_rvec(box[dd->dim[d]], shift);
 293         }
 294         cd = &comm->cd[d];
 295         for (const gmx_domdec_ind_t& ind : cd->ind)
 296         {
 297             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 298             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 299             int                       n          = 0;
 300             if (!bPBC)
 301             {
 302                 for (int j : ind.index)
 303                 {
 304                     sendBuffer[n] = x[j];
 305                     n++;
 306                 }
 307             }
 308             else if (!bScrew)
 309             {
 310                 for (int j : ind.index)
 311                 {
 312                     /* We need to shift the coordinates */
 313                     for (int d = 0; d < DIM; d++)
 314                     {
 315                         sendBuffer[n][d] = x[j][d] + shift[d];
 316                     }
 317                     n++;
 318                 }
 319             }
 320             else
 321             {
 322                 for (int j : ind.index)
 323                 {
 324                     /* Shift x */
 325                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 326                     /* Rotate y and z.
 327                      * This operation requires a special shift force
 328                      * treatment, which is performed in calc_vir.
 329                      */
 330                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 331                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 332                     n++;
 333                 }
 334             }
 335
 336             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 337                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 338
 339             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 340             if (cd->receiveInPlace)
 341             {
 342                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 343             }
 344             else
 345             {
 346                 receiveBuffer = receiveBufferAccess.buffer;
 347             }
 348             /* Send and receive the coordinates */
 349             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 350
 351             if (!cd->receiveInPlace)
 352             {
 353                 int j = 0;
 354                 for (int zone = 0; zone < nzone; zone++)
 355                 {
 356                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 357                     {
 358                         x[i] = receiveBuffer[j++];
 359                     }
 360                 }
 361             }
 362             nat_tot += ind.nrecv[nzone + 1];
 363         }
 364         nzone += nzone;
 365     }
 366
 367     wallcycle_stop(wcycle, ewcMOVEX);
 368 }
 369
 370 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 371 {
 372     wallcycle_start(wcycle, ewcMOVEF);
 373
 374     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 375     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 376
 377     gmx_domdec_comm_t& comm    = *dd->comm;
 378     int                nzone   = comm.zones.n / 2;
 379     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 380     for (int d = dd->ndim - 1; d >= 0; d--)
 381     {
 382         /* Only forces in domains near the PBC boundaries need to
 383            consider PBC in the treatment of fshift */
 384         const bool shiftForcesNeedPbc =
 385                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 386         const bool applyScrewPbc =
 387                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 388         /* Determine which shift vector we need */
 389         ivec vis        = { 0, 0, 0 };
 390         vis[dd->dim[d]] = 1;
 391         const int is    = IVEC2IS(vis);
 392
 393         /* Loop over the pulses */
 394         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 395         for (int p = cd.numPulses() - 1; p >= 0; p--)
 396         {
 397             const gmx_domdec_ind_t&   ind = cd.ind[p];
 398             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 399             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 400
 401             nat_tot -= ind.nrecv[nzone + 1];
 402
 403             DDBufferAccess<gmx::RVec> sendBufferAccess(
 404                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 405
 406             gmx::ArrayRef<gmx::RVec> sendBuffer;
 407             if (cd.receiveInPlace)
 408             {
 409                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 410             }
 411             else
 412             {
 413                 sendBuffer = sendBufferAccess.buffer;
 414                 int j      = 0;
 415                 for (int zone = 0; zone < nzone; zone++)
 416                 {
 417                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 418                     {
 419                         sendBuffer[j++] = f[i];
 420                     }
 421                 }
 422             }
 423             /* Communicate the forces */
 424             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 425             /* Add the received forces */
 426             int n = 0;
 427             if (!shiftForcesNeedPbc)
 428             {
 429                 for (int j : ind.index)
 430                 {
 431                     for (int d = 0; d < DIM; d++)
 432                     {
 433                         f[j][d] += receiveBuffer[n][d];
 434                     }
 435                     n++;
 436                 }
 437             }
 438             else if (!applyScrewPbc)
 439             {
 440                 for (int j : ind.index)
 441                 {
 442                     for (int d = 0; d < DIM; d++)
 443                     {
 444                         f[j][d] += receiveBuffer[n][d];
 445                     }
 446                     /* Add this force to the shift force */
 447                     for (int d = 0; d < DIM; d++)
 448                     {
 449                         fshift[is][d] += receiveBuffer[n][d];
 450                     }
 451                     n++;
 452                 }
 453             }
 454             else
 455             {
 456                 for (int j : ind.index)
 457                 {
 458                     /* Rotate the force */
 459                     f[j][XX] += receiveBuffer[n][XX];
 460                     f[j][YY] -= receiveBuffer[n][YY];
 461                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 462                     if (shiftForcesNeedPbc)
 463                     {
 464                         /* Add this force to the shift force */
 465                         for (int d = 0; d < DIM; d++)
 466                         {
 467                             fshift[is][d] += receiveBuffer[n][d];
 468                         }
 469                     }
 470                     n++;
 471                 }
 472             }
 473         }
 474         nzone /= 2;
 475     }
 476     wallcycle_stop(wcycle, ewcMOVEF);
 477 }
 478
 479 /* Convenience function for extracting a real buffer from an rvec buffer
 480  *
 481  * To reduce the number of temporary communication buffers and avoid
 482  * cache polution, we reuse gmx::RVec buffers for storing reals.
 483  * This functions return a real buffer reference with the same number
 484  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 485  */
 486 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 487 {
 488     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 489 }
 490
 491 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 492 {
 493     int                    nzone, nat_tot;
 494     gmx_domdec_comm_t*     comm;
 495     gmx_domdec_comm_dim_t* cd;
 496
 497     comm = dd->comm;
 498
 499     nzone   = 1;
 500     nat_tot = comm->atomRanges.numHomeAtoms();
 501     for (int d = 0; d < dd->ndim; d++)
 502     {
 503         cd = &comm->cd[d];
 504         for (const gmx_domdec_ind_t& ind : cd->ind)
 505         {
 506             /* Note: We provision for RVec instead of real, so a factor of 3
 507              * more than needed. The buffer actually already has this size
 508              * and we pass a plain pointer below, so this does not matter.
 509              */
 510             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 511             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 512             int                 n          = 0;
 513             for (int j : ind.index)
 514             {
 515                 sendBuffer[n++] = v[j];
 516             }
 517
 518             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 519                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 520
 521             gmx::ArrayRef<real> receiveBuffer;
 522             if (cd->receiveInPlace)
 523             {
 524                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 525             }
 526             else
 527             {
 528                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 529             }
 530             /* Send and receive the data */
 531             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 532             if (!cd->receiveInPlace)
 533             {
 534                 int j = 0;
 535                 for (int zone = 0; zone < nzone; zone++)
 536                 {
 537                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 538                     {
 539                         v[i] = receiveBuffer[j++];
 540                     }
 541                 }
 542             }
 543             nat_tot += ind.nrecv[nzone + 1];
 544         }
 545         nzone += nzone;
 546     }
 547 }
 548
 549 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 550 {
 551     int                    nzone, nat_tot;
 552     gmx_domdec_comm_t*     comm;
 553     gmx_domdec_comm_dim_t* cd;
 554
 555     comm = dd->comm;
 556
 557     nzone   = comm->zones.n / 2;
 558     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 559     for (int d = dd->ndim - 1; d >= 0; d--)
 560     {
 561         cd = &comm->cd[d];
 562         for (int p = cd->numPulses() - 1; p >= 0; p--)
 563         {
 564             const gmx_domdec_ind_t& ind = cd->ind[p];
 565
 566             /* Note: We provision for RVec instead of real, so a factor of 3
 567              * more than needed. The buffer actually already has this size
 568              * and we typecast, so this works as intended.
 569              */
 570             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 571             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 572             nat_tot -= ind.nrecv[nzone + 1];
 573
 574             DDBufferAccess<gmx::RVec> sendBufferAccess(
 575                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 576
 577             gmx::ArrayRef<real> sendBuffer;
 578             if (cd->receiveInPlace)
 579             {
 580                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 581             }
 582             else
 583             {
 584                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 585                 int j      = 0;
 586                 for (int zone = 0; zone < nzone; zone++)
 587                 {
 588                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 589                     {
 590                         sendBuffer[j++] = v[i];
 591                     }
 592                 }
 593             }
 594             /* Communicate the forces */
 595             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 596             /* Add the received forces */
 597             int n = 0;
 598             for (int j : ind.index)
 599             {
 600                 v[j] += receiveBuffer[n];
 601                 n++;
 602             }
 603         }
 604         nzone /= 2;
 605     }
 606 }
 607
 608 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 609 {
 610     const gmx_domdec_comm_t& comm       = *dd->comm;
 611     const DDSystemInfo&      systemInfo = comm.systemInfo;
 612
 613     real r = -1;
 614     if (systemInfo.haveInterDomainMultiBodyBondeds)
 615     {
 616         if (comm.cutoff_mbody > 0)
 617         {
 618             r = comm.cutoff_mbody;
 619         }
 620         else
 621         {
 622             /* cutoff_mbody=0 means we do not have DLB */
 623             r = comm.cellsize_min[dd->dim[0]];
 624             for (int di = 1; di < dd->ndim; di++)
 625             {
 626                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 627             }
 628             if (comm.systemInfo.filterBondedCommunication)
 629             {
 630                 r = std::max(r, comm.cutoff_mbody);
 631             }
 632             else
 633             {
 634                 r = std::min(r, systemInfo.cutoff);
 635             }
 636         }
 637     }
 638
 639     return r;
 640 }
 641
 642 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 643 {
 644     real r_mb;
 645
 646     r_mb = dd_cutoff_multibody(dd);
 647
 648     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 649 }
 650
 651
 652 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 653                                    const CartesianRankSetup& cartSetup,
 654                                    const ivec                coord,
 655                                    ivec                      coord_pme)
 656 {
 657     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 658     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 659     copy_ivec(coord, coord_pme);
 660     coord_pme[cartSetup.cartpmedim] =
 661             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 662 }
 663
 664 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 665 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 666 {
 667     const int npp  = ddRankSetup.numPPRanks;
 668     const int npme = ddRankSetup.numRanksDoingPme;
 669
 670     /* Here we assign a PME node to communicate with this DD node
 671      * by assuming that the major index of both is x.
 672      * We add npme/2 to obtain an even distribution.
 673      */
 674     return (ddCellIndex * npme + npme / 2) / npp;
 675 }
 676
 677 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 678 {
 679     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 680
 681     int n = 0;
 682     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 683     {
 684         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 685         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 686         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 687         {
 688             if (debug)
 689             {
 690                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 691             }
 692             pmeRanks[n] = i + 1 + n;
 693             n++;
 694         }
 695     }
 696
 697     return pmeRanks;
 698 }
 699
 700 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 701 {
 702     gmx_domdec_t* dd;
 703     ivec          coords;
 704     int           slab;
 705
 706     dd = cr->dd;
 707     /*
 708        if (dd->comm->bCartesian) {
 709        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 710        dd_coords2pmecoords(dd,coords,coords_pme);
 711        copy_ivec(dd->ntot,nc);
 712        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 713        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 714
 715        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 716        } else {
 717        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 718        }
 719      */
 720     coords[XX] = x;
 721     coords[YY] = y;
 722     coords[ZZ] = z;
 723     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 724
 725     return slab;
 726 }
 727
 728 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 729 {
 730     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 731     ivec                      coords    = { x, y, z };
 732     int                       nodeid    = -1;
 733
 734     if (cartSetup.bCartesianPP_PME)
 735     {
 736 #if GMX_MPI
 737         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 738 #endif
 739     }
 740     else
 741     {
 742         int ddindex = dd_index(cr->dd->nc, coords);
 743         if (cartSetup.bCartesianPP)
 744         {
 745             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 746         }
 747         else
 748         {
 749             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 750             {
 751                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 752             }
 753             else
 754             {
 755                 nodeid = ddindex;
 756             }
 757         }
 758     }
 759
 760     return nodeid;
 761 }
 762
 763 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 764                               const CartesianRankSetup& cartSetup,
 765                               gmx::ArrayRef<const int>  pmeRanks,
 766                               const t_commrec gmx_unused* cr,
 767                               const int                   sim_nodeid)
 768 {
 769     int pmenode = -1;
 770
 771     /* This assumes a uniform x domain decomposition grid cell size */
 772     if (cartSetup.bCartesianPP_PME)
 773     {
 774 #if GMX_MPI
 775         ivec coord, coord_pme;
 776         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 777         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 778         {
 779             /* This is a PP rank */
 780             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 781             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 782         }
 783 #endif
 784     }
 785     else if (cartSetup.bCartesianPP)
 786     {
 787         if (sim_nodeid < ddRankSetup.numPPRanks)
 788         {
 789             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 790         }
 791     }
 792     else
 793     {
 794         /* This assumes DD cells with identical x coordinates
 795          * are numbered sequentially.
 796          */
 797         if (pmeRanks.empty())
 798         {
 799             if (sim_nodeid < ddRankSetup.numPPRanks)
 800             {
 801                 /* The DD index equals the nodeid */
 802                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 803             }
 804         }
 805         else
 806         {
 807             int i = 0;
 808             while (sim_nodeid > pmeRanks[i])
 809             {
 810                 i++;
 811             }
 812             if (sim_nodeid < pmeRanks[i])
 813             {
 814                 pmenode = pmeRanks[i];
 815             }
 816         }
 817     }
 818
 819     return pmenode;
 820 }
 821
 822 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 823 {
 824     if (dd != nullptr)
 825     {
 826         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 827     }
 828     else
 829     {
 830         return { 1, 1 };
 831     }
 832 }
 833
 834 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 835 {
 836     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 837     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 838     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 839                        "This function should only be called when PME-only ranks are in use");
 840     std::vector<int> ddranks;
 841     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 842
 843     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 844     {
 845         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 846         {
 847             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 848             {
 849                 if (cartSetup.bCartesianPP_PME)
 850                 {
 851                     ivec coord = { x, y, z };
 852                     ivec coord_pme;
 853                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 854                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 855                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 856                     {
 857                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 858                     }
 859                 }
 860                 else
 861                 {
 862                     /* The slab corresponds to the nodeid in the PME group */
 863                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 864                     {
 865                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 866                     }
 867                 }
 868             }
 869         }
 870     }
 871     return ddranks;
 872 }
 873
 874 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 875 {
 876     gmx_bool bReceive = TRUE;
 877
 878     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 879     if (ddRankSetup.usePmeOnlyRanks)
 880     {
 881         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 882         if (cartSetup.bCartesianPP_PME)
 883         {
 884 #if GMX_MPI
 885             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 886             ivec coords;
 887             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 888             coords[cartSetup.cartpmedim]++;
 889             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 890             {
 891                 int rank;
 892                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 893                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 894                 {
 895                     /* This is not the last PP node for pmenode */
 896                     bReceive = FALSE;
 897                 }
 898             }
 899 #else
 900             GMX_RELEASE_ASSERT(
 901                     false,
 902                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 903 #endif
 904         }
 905         else
 906         {
 907             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 908             if (cr->sim_nodeid + 1 < cr->nnodes
 909                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 910             {
 911                 /* This is not the last PP node for pmenode */
 912                 bReceive = FALSE;
 913             }
 914         }
 915     }
 916
 917     return bReceive;
 918 }
 919
 920 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 921 {
 922     gmx_domdec_comm_t* comm;
 923     int                i;
 924
 925     comm = dd->comm;
 926
 927     snew(*dim_f, dd->nc[dim] + 1);
 928     (*dim_f)[0] = 0;
 929     for (i = 1; i < dd->nc[dim]; i++)
 930     {
 931         if (comm->slb_frac[dim])
 932         {
 933             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 934         }
 935         else
 936         {
 937             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->nc[dim]);
 938         }
 939     }
 940     (*dim_f)[dd->nc[dim]] = 1;
 941 }
 942
 943 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 944 {
 945     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 946
 947     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 948     {
 949         ddpme->dim = YY;
 950     }
 951     else
 952     {
 953         ddpme->dim = dimind;
 954     }
 955     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 956
 957     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 958
 959     if (ddpme->nslab <= 1)
 960     {
 961         return;
 962     }
 963
 964     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 965     /* Determine for each PME slab the PP location range for dimension dim */
 966     snew(ddpme->pp_min, ddpme->nslab);
 967     snew(ddpme->pp_max, ddpme->nslab);
 968     for (int slab = 0; slab < ddpme->nslab; slab++)
 969     {
 970         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 971         ddpme->pp_max[slab] = 0;
 972     }
 973     for (int i = 0; i < dd->nnodes; i++)
 974     {
 975         ivec xyz;
 976         ddindex2xyz(dd->nc, i, xyz);
 977         /* For y only use our y/z slab.
 978          * This assumes that the PME x grid size matches the DD grid size.
 979          */
 980         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 981         {
 982             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 983             int       slab;
 984             if (dimind == 0)
 985             {
 986                 slab = pmeindex / nso;
 987             }
 988             else
 989             {
 990                 slab = pmeindex % ddpme->nslab;
 991             }
 992             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 993             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 994         }
 995     }
 996
 997     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 998 }
 999
1000 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
1001 {
1002     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1003
1004     if (ddRankSetup.ddpme[0].dim == XX)
1005     {
1006         return ddRankSetup.ddpme[0].maxshift;
1007     }
1008     else
1009     {
1010         return 0;
1011     }
1012 }
1013
1014 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
1015 {
1016     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1017
1018     if (ddRankSetup.ddpme[0].dim == YY)
1019     {
1020         return ddRankSetup.ddpme[0].maxshift;
1021     }
1022     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1023     {
1024         return ddRankSetup.ddpme[1].maxshift;
1025     }
1026     else
1027     {
1028         return 0;
1029     }
1030 }
1031
1032 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1033 {
1034     return dd.comm->systemInfo.haveSplitConstraints;
1035 }
1036
1037 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1038 {
1039     return dd.comm->systemInfo.useUpdateGroups;
1040 }
1041
1042 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1043 {
1044     /* Note that the cycles value can be incorrect, either 0 or some
1045      * extremely large value, when our thread migrated to another core
1046      * with an unsynchronized cycle counter. If this happens less often
1047      * that once per nstlist steps, this will not cause issues, since
1048      * we later subtract the maximum value from the sum over nstlist steps.
1049      * A zero count will slightly lower the total, but that's a small effect.
1050      * Note that the main purpose of the subtraction of the maximum value
1051      * is to avoid throwing off the load balancing when stalls occur due
1052      * e.g. system activity or network congestion.
1053      */
1054     dd->comm->cycl[ddCycl] += cycles;
1055     dd->comm->cycl_n[ddCycl]++;
1056     if (cycles > dd->comm->cycl_max[ddCycl])
1057     {
1058         dd->comm->cycl_max[ddCycl] = cycles;
1059     }
1060 }
1061
1062 #if GMX_MPI
1063 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1064 {
1065     MPI_Comm c_row;
1066     int      dim, i, rank;
1067     ivec     loc_c;
1068     gmx_bool bPartOfGroup = FALSE;
1069
1070     dim = dd->dim[dim_ind];
1071     copy_ivec(loc, loc_c);
1072     for (i = 0; i < dd->nc[dim]; i++)
1073     {
1074         loc_c[dim] = i;
1075         rank       = dd_index(dd->nc, loc_c);
1076         if (rank == dd->rank)
1077         {
1078             /* This process is part of the group */
1079             bPartOfGroup = TRUE;
1080         }
1081     }
1082     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1083     if (bPartOfGroup)
1084     {
1085         dd->comm->mpi_comm_load[dim_ind] = c_row;
1086         if (!isDlbDisabled(dd->comm))
1087         {
1088             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1089
1090             if (dd->ci[dim] == dd->master_ci[dim])
1091             {
1092                 /* This is the root process of this row */
1093                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1094
1095                 RowMaster& rowMaster = *cellsizes.rowMaster;
1096                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1097                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1098                 rowMaster.isCellMin.resize(dd->nc[dim]);
1099                 if (dim_ind > 0)
1100                 {
1101                     rowMaster.bounds.resize(dd->nc[dim]);
1102                 }
1103                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1104             }
1105             else
1106             {
1107                 /* This is not a root process, we only need to receive cell_f */
1108                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1109             }
1110         }
1111         if (dd->ci[dim] == dd->master_ci[dim])
1112         {
1113             snew(dd->comm->load[dim_ind].load, dd->nc[dim] * DD_NLOAD_MAX);
1114         }
1115     }
1116 }
1117 #endif
1118
1119 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1120 {
1121 #if GMX_MPI
1122     int           physicalnode_id_hash;
1123     gmx_domdec_t* dd;
1124     MPI_Comm      mpi_comm_pp_physicalnode;
1125
1126     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1127     {
1128         /* Only ranks with short-ranged tasks (currently) use GPUs.
1129          * If we don't have GPUs assigned, there are no resources to share.
1130          */
1131         return;
1132     }
1133
1134     physicalnode_id_hash = gmx_physicalnode_id_hash();
1135
1136     dd = cr->dd;
1137
1138     if (debug)
1139     {
1140         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1141         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1142                 physicalnode_id_hash, gpu_id);
1143     }
1144     /* Split the PP communicator over the physical nodes */
1145     /* TODO: See if we should store this (before), as it's also used for
1146      * for the nodecomm summation.
1147      */
1148     // TODO PhysicalNodeCommunicator could be extended/used to handle
1149     // the need for per-node per-group communicators.
1150     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1151     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1152     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1153     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1154
1155     if (debug)
1156     {
1157         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1158     }
1159
1160     /* Note that some ranks could share a GPU, while others don't */
1161
1162     if (dd->comm->nrank_gpu_shared == 1)
1163     {
1164         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1165     }
1166 #else
1167     GMX_UNUSED_VALUE(cr);
1168     GMX_UNUSED_VALUE(gpu_id);
1169 #endif
1170 }
1171
1172 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1173 {
1174 #if GMX_MPI
1175     int  dim0, dim1, i, j;
1176     ivec loc;
1177
1178     if (debug)
1179     {
1180         fprintf(debug, "Making load communicators\n");
1181     }
1182
1183     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1184     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1185
1186     if (dd->ndim == 0)
1187     {
1188         return;
1189     }
1190
1191     clear_ivec(loc);
1192     make_load_communicator(dd, 0, loc);
1193     if (dd->ndim > 1)
1194     {
1195         dim0 = dd->dim[0];
1196         for (i = 0; i < dd->nc[dim0]; i++)
1197         {
1198             loc[dim0] = i;
1199             make_load_communicator(dd, 1, loc);
1200         }
1201     }
1202     if (dd->ndim > 2)
1203     {
1204         dim0 = dd->dim[0];
1205         for (i = 0; i < dd->nc[dim0]; i++)
1206         {
1207             loc[dim0] = i;
1208             dim1      = dd->dim[1];
1209             for (j = 0; j < dd->nc[dim1]; j++)
1210             {
1211                 loc[dim1] = j;
1212                 make_load_communicator(dd, 2, loc);
1213             }
1214         }
1215     }
1216
1217     if (debug)
1218     {
1219         fprintf(debug, "Finished making load communicators\n");
1220     }
1221 #endif
1222 }
1223
1224 /*! \brief Sets up the relation between neighboring domains and zones */
1225 static void setup_neighbor_relations(gmx_domdec_t* dd)
1226 {
1227     int                 d, dim, m;
1228     ivec                tmp, s;
1229     gmx_domdec_zones_t* zones;
1230     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1231
1232     for (d = 0; d < dd->ndim; d++)
1233     {
1234         dim = dd->dim[d];
1235         copy_ivec(dd->ci, tmp);
1236         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1237         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1238         copy_ivec(dd->ci, tmp);
1239         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1240         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1241         if (debug)
1242         {
1243             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1244                     dd->neighbor[d][0], dd->neighbor[d][1]);
1245         }
1246     }
1247
1248     int nzone  = (1 << dd->ndim);
1249     int nizone = (1 << std::max(dd->ndim - 1, 0));
1250     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1251
1252     zones = &dd->comm->zones;
1253
1254     for (int i = 0; i < nzone; i++)
1255     {
1256         m = 0;
1257         clear_ivec(zones->shift[i]);
1258         for (d = 0; d < dd->ndim; d++)
1259         {
1260             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1261         }
1262     }
1263
1264     zones->n = nzone;
1265     for (int i = 0; i < nzone; i++)
1266     {
1267         for (d = 0; d < DIM; d++)
1268         {
1269             s[d] = dd->ci[d] - zones->shift[i][d];
1270             if (s[d] < 0)
1271             {
1272                 s[d] += dd->nc[d];
1273             }
1274             else if (s[d] >= dd->nc[d])
1275             {
1276                 s[d] -= dd->nc[d];
1277             }
1278         }
1279     }
1280     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1281     {
1282         GMX_RELEASE_ASSERT(
1283                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1284                 "The first element for each ddNonbondedZonePairRanges should match its index");
1285
1286         DDPairInteractionRanges iZone;
1287         iZone.iZoneIndex = iZoneIndex;
1288         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1289          * j-zones up to nzone.
1290          */
1291         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1292                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1293         for (dim = 0; dim < DIM; dim++)
1294         {
1295             if (dd->nc[dim] == 1)
1296             {
1297                 /* All shifts should be allowed */
1298                 iZone.shift0[dim] = -1;
1299                 iZone.shift1[dim] = 1;
1300             }
1301             else
1302             {
1303                 /* Determine the min/max j-zone shift wrt the i-zone */
1304                 iZone.shift0[dim] = 1;
1305                 iZone.shift1[dim] = -1;
1306                 for (int jZone : iZone.jZoneRange)
1307                 {
1308                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1309                     if (shift_diff < iZone.shift0[dim])
1310                     {
1311                         iZone.shift0[dim] = shift_diff;
1312                     }
1313                     if (shift_diff > iZone.shift1[dim])
1314                     {
1315                         iZone.shift1[dim] = shift_diff;
1316                     }
1317                 }
1318             }
1319         }
1320
1321         zones->iZones.push_back(iZone);
1322     }
1323
1324     if (!isDlbDisabled(dd->comm))
1325     {
1326         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1327     }
1328
1329     if (dd->comm->ddSettings.recordLoad)
1330     {
1331         make_load_communicators(dd);
1332     }
1333 }
1334
1335 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1336                                  gmx_domdec_t*        dd,
1337                                  t_commrec gmx_unused* cr,
1338                                  bool gmx_unused reorder)
1339 {
1340 #if GMX_MPI
1341     gmx_domdec_comm_t*  comm      = dd->comm;
1342     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1343
1344     if (cartSetup.bCartesianPP)
1345     {
1346         /* Set up cartesian communication for the particle-particle part */
1347         GMX_LOG(mdlog.info)
1348                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d", dd->nc[XX],
1349                                      dd->nc[YY], dd->nc[ZZ]);
1350
1351         ivec periods;
1352         for (int i = 0; i < DIM; i++)
1353         {
1354             periods[i] = TRUE;
1355         }
1356         MPI_Comm comm_cart;
1357         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder), &comm_cart);
1358         /* We overwrite the old communicator with the new cartesian one */
1359         cr->mpi_comm_mygroup = comm_cart;
1360     }
1361
1362     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1363     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1364
1365     if (cartSetup.bCartesianPP_PME)
1366     {
1367         /* Since we want to use the original cartesian setup for sim,
1368          * and not the one after split, we need to make an index.
1369          */
1370         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1371         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1372         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1373         /* Get the rank of the DD master,
1374          * above we made sure that the master node is a PP node.
1375          */
1376         int rank;
1377         if (MASTER(cr))
1378         {
1379             rank = dd->rank;
1380         }
1381         else
1382         {
1383             rank = 0;
1384         }
1385         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1386     }
1387     else if (cartSetup.bCartesianPP)
1388     {
1389         if (!comm->ddRankSetup.usePmeOnlyRanks)
1390         {
1391             /* The PP communicator is also
1392              * the communicator for this simulation
1393              */
1394             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1395         }
1396         cr->nodeid = dd->rank;
1397
1398         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1399
1400         /* We need to make an index to go from the coordinates
1401          * to the nodeid of this simulation.
1402          */
1403         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1404         std::vector<int> buf(dd->nnodes);
1405         if (thisRankHasDuty(cr, DUTY_PP))
1406         {
1407             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1408         }
1409         /* Communicate the ddindex to simulation nodeid index */
1410         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1411                       cr->mpi_comm_mysim);
1412
1413         /* Determine the master coordinates and rank.
1414          * The DD master should be the same node as the master of this sim.
1415          */
1416         for (int i = 0; i < dd->nnodes; i++)
1417         {
1418             if (cartSetup.ddindex2simnodeid[i] == 0)
1419             {
1420                 ddindex2xyz(dd->nc, i, dd->master_ci);
1421                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1422             }
1423         }
1424         if (debug)
1425         {
1426             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1427         }
1428     }
1429     else
1430     {
1431         /* No Cartesian communicators */
1432         /* We use the rank in dd->comm->all as DD index */
1433         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1434         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1435         dd->masterrank = 0;
1436         clear_ivec(dd->master_ci);
1437     }
1438 #endif
1439
1440     GMX_LOG(mdlog.info)
1441             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1442                                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1443     if (debug)
1444     {
1445         fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1446                 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1447     }
1448 }
1449
1450 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1451 {
1452 #if GMX_MPI
1453     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1454
1455     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1456     {
1457         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1458         std::vector<int> buf(dd->nnodes);
1459         if (thisRankHasDuty(cr, DUTY_PP))
1460         {
1461             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1462         }
1463         /* Communicate the ddindex to simulation nodeid index */
1464         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1465                       cr->mpi_comm_mysim);
1466     }
1467 #else
1468     GMX_UNUSED_VALUE(dd);
1469     GMX_UNUSED_VALUE(cr);
1470 #endif
1471 }
1472
1473 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1474                                              t_commrec*           cr,
1475                                              const DdRankOrder    ddRankOrder,
1476                                              bool gmx_unused    reorder,
1477                                              const DDRankSetup& ddRankSetup,
1478                                              ivec               ddCellIndex,
1479                                              std::vector<int>*  pmeRanks)
1480 {
1481     CartesianRankSetup cartSetup;
1482
1483     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1484     cartSetup.bCartesianPP_PME = false;
1485
1486     const ivec& numDDCells = ddRankSetup.numPPCells;
1487     /* Initially we set ntot to the number of PP cells */
1488     copy_ivec(numDDCells, cartSetup.ntot);
1489
1490     if (cartSetup.bCartesianPP)
1491     {
1492         const int numDDCellsTot = ddRankSetup.numPPRanks;
1493         bool      bDiv[DIM];
1494         for (int i = 1; i < DIM; i++)
1495         {
1496             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1497         }
1498         if (bDiv[YY] || bDiv[ZZ])
1499         {
1500             cartSetup.bCartesianPP_PME = TRUE;
1501             /* If we have 2D PME decomposition, which is always in x+y,
1502              * we stack the PME only nodes in z.
1503              * Otherwise we choose the direction that provides the thinnest slab
1504              * of PME only nodes as this will have the least effect
1505              * on the PP communication.
1506              * But for the PME communication the opposite might be better.
1507              */
1508             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1509             {
1510                 cartSetup.cartpmedim = ZZ;
1511             }
1512             else
1513             {
1514                 cartSetup.cartpmedim = YY;
1515             }
1516             cartSetup.ntot[cartSetup.cartpmedim] +=
1517                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1518         }
1519         else
1520         {
1521             GMX_LOG(mdlog.info)
1522                     .appendTextFormatted(
1523                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1524                             "nx*nz (%d*%d)",
1525                             ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1526                             numDDCells[XX], numDDCells[ZZ]);
1527             GMX_LOG(mdlog.info)
1528                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1529         }
1530     }
1531
1532     if (cartSetup.bCartesianPP_PME)
1533     {
1534 #if GMX_MPI
1535         int  rank;
1536         ivec periods;
1537
1538         GMX_LOG(mdlog.info)
1539                 .appendTextFormatted(
1540                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1541                         cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1542
1543         for (int i = 0; i < DIM; i++)
1544         {
1545             periods[i] = TRUE;
1546         }
1547         MPI_Comm comm_cart;
1548         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1549                         &comm_cart);
1550         MPI_Comm_rank(comm_cart, &rank);
1551         if (MASTER(cr) && rank != 0)
1552         {
1553             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1554         }
1555
1556         /* With this assigment we loose the link to the original communicator
1557          * which will usually be MPI_COMM_WORLD, unless have multisim.
1558          */
1559         cr->mpi_comm_mysim = comm_cart;
1560         cr->sim_nodeid     = rank;
1561
1562         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1563
1564         GMX_LOG(mdlog.info)
1565                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1566                                      ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1567
1568         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1569         {
1570             cr->duty = DUTY_PP;
1571         }
1572         if (!ddRankSetup.usePmeOnlyRanks
1573             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1574         {
1575             cr->duty = DUTY_PME;
1576         }
1577
1578         /* Split the sim communicator into PP and PME only nodes */
1579         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1580                        dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1581 #else
1582         GMX_UNUSED_VALUE(ddCellIndex);
1583 #endif
1584     }
1585     else
1586     {
1587         switch (ddRankOrder)
1588         {
1589             case DdRankOrder::pp_pme:
1590                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1591                 break;
1592             case DdRankOrder::interleave:
1593                 /* Interleave the PP-only and PME-only ranks */
1594                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1595                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1596                 break;
1597             case DdRankOrder::cartesian: break;
1598             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1599         }
1600
1601         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1602         {
1603             cr->duty = DUTY_PME;
1604         }
1605         else
1606         {
1607             cr->duty = DUTY_PP;
1608         }
1609 #if GMX_MPI
1610         /* Split the sim communicator into PP and PME only nodes */
1611         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1612         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1613 #endif
1614     }
1615
1616     GMX_LOG(mdlog.info)
1617             .appendTextFormatted("This rank does only %s work.\n",
1618                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1619
1620     return cartSetup;
1621 }
1622
1623 /*! \brief Makes the PP communicator and the PME communicator, when needed
1624  *
1625  * Returns the Cartesian rank setup.
1626  * Sets \p cr->mpi_comm_mygroup
1627  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1628  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1629  */
1630 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1631                                                  const DDSettings&    ddSettings,
1632                                                  const DdRankOrder    ddRankOrder,
1633                                                  const DDRankSetup&   ddRankSetup,
1634                                                  t_commrec*           cr,
1635                                                  ivec                 ddCellIndex,
1636                                                  std::vector<int>*    pmeRanks)
1637 {
1638     CartesianRankSetup cartSetup;
1639
1640     if (ddRankSetup.usePmeOnlyRanks)
1641     {
1642         /* Split the communicator into a PP and PME part */
1643         cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1644                                        ddRankSetup, ddCellIndex, pmeRanks);
1645     }
1646     else
1647     {
1648         /* All nodes do PP and PME */
1649         /* We do not require separate communicators */
1650         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1651
1652         cartSetup.bCartesianPP     = false;
1653         cartSetup.bCartesianPP_PME = false;
1654     }
1655
1656     return cartSetup;
1657 }
1658
1659 /*! \brief For PP ranks, sets or makes the communicator
1660  *
1661  * For PME ranks get the rank id.
1662  * For PP only ranks, sets the PME-only rank.
1663  */
1664 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1665                                     const DDSettings&        ddSettings,
1666                                     gmx::ArrayRef<const int> pmeRanks,
1667                                     t_commrec*               cr,
1668                                     const int                numAtomsInSystem,
1669                                     gmx_domdec_t*            dd)
1670 {
1671     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1672     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1673
1674     if (thisRankHasDuty(cr, DUTY_PP))
1675     {
1676         /* Copy or make a new PP communicator */
1677
1678         /* We (possibly) reordered the nodes in split_communicator,
1679          * so it is no longer required in make_pp_communicator.
1680          */
1681         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1682
1683         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1684     }
1685     else
1686     {
1687         receive_ddindex2simnodeid(dd, cr);
1688     }
1689
1690     if (!thisRankHasDuty(cr, DUTY_PME))
1691     {
1692         /* Set up the commnuication to our PME node */
1693         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1694         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1695         if (debug)
1696         {
1697             fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1698                     gmx::boolToString(dd->pme_receive_vir_ener));
1699         }
1700     }
1701     else
1702     {
1703         dd->pme_nodeid = -1;
1704     }
1705
1706     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1707     if (MASTER(cr))
1708     {
1709         dd->ma = std::make_unique<AtomDistribution>(dd->nc, numAtomsInSystem, numAtomsInSystem);
1710     }
1711 }
1712
1713 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1714 {
1715     real * slb_frac, tot;
1716     int    i, n;
1717     double dbl;
1718
1719     slb_frac = nullptr;
1720     if (nc > 1 && size_string != nullptr)
1721     {
1722         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1723         snew(slb_frac, nc);
1724         tot = 0;
1725         for (i = 0; i < nc; i++)
1726         {
1727             dbl = 0;
1728             sscanf(size_string, "%20lf%n", &dbl, &n);
1729             if (dbl == 0)
1730             {
1731                 gmx_fatal(FARGS,
1732                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1733                           dir, size_string);
1734             }
1735             slb_frac[i] = dbl;
1736             size_string += n;
1737             tot += slb_frac[i];
1738         }
1739         /* Normalize */
1740         std::string relativeCellSizes = "Relative cell sizes:";
1741         for (i = 0; i < nc; i++)
1742         {
1743             slb_frac[i] /= tot;
1744             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1745         }
1746         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1747     }
1748
1749     return slb_frac;
1750 }
1751
1752 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1753 {
1754     int                  n     = 0;
1755     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1756     int                  nmol;
1757     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1758     {
1759         for (auto& ilist : extractILists(*ilists, IF_BOND))
1760         {
1761             if (NRAL(ilist.functionType) > 2)
1762             {
1763                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1764             }
1765         }
1766     }
1767
1768     return n;
1769 }
1770
1771 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1772 {
1773     char* val;
1774     int   nst;
1775
1776     nst = def;
1777     val = getenv(env_var);
1778     if (val)
1779     {
1780         if (sscanf(val, "%20d", &nst) <= 0)
1781         {
1782             nst = 1;
1783         }
1784         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1785     }
1786
1787     return nst;
1788 }
1789
1790 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1791 {
1792     if (ir->ePBC == epbcSCREW && (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1793     {
1794         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1795                   epbc_names[ir->ePBC]);
1796     }
1797
1798     if (ir->nstlist == 0)
1799     {
1800         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1801     }
1802
1803     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1804     {
1805         GMX_LOG(mdlog.warning)
1806                 .appendText(
1807                         "comm-mode angular will give incorrect results when the comm group "
1808                         "partially crosses a periodic boundary");
1809     }
1810 }
1811
1812 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1813 {
1814     real r = ddbox.box_size[XX];
1815     for (int d = 0; d < DIM; d++)
1816     {
1817         if (numDomains[d] > 1)
1818         {
1819             /* Check using the initial average cell size */
1820             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1821         }
1822     }
1823
1824     return r;
1825 }
1826
1827 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1828  */
1829 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1830                                   const std::string&   reasonStr,
1831                                   const gmx::MDLogger& mdlog)
1832 {
1833     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1834     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1835
1836     if (cmdlineDlbState == DlbState::onUser)
1837     {
1838         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1839     }
1840     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1841     {
1842         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1843     }
1844     return DlbState::offForever;
1845 }
1846
1847 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1848  *
1849  * This function parses the parameters of "-dlb" command line option setting
1850  * corresponding state values. Then it checks the consistency of the determined
1851  * state with other run parameters and settings. As a result, the initial state
1852  * may be altered or an error may be thrown if incompatibility of options is detected.
1853  *
1854  * \param [in] mdlog       Logger.
1855  * \param [in] dlbOption   Enum value for the DLB option.
1856  * \param [in] bRecordLoad True if the load balancer is recording load information.
1857  * \param [in] mdrunOptions  Options for mdrun.
1858  * \param [in] ir          Pointer mdrun to input parameters.
1859  * \returns                DLB initial/startup state.
1860  */
1861 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1862                                          DlbOption                dlbOption,
1863                                          gmx_bool                 bRecordLoad,
1864                                          const gmx::MdrunOptions& mdrunOptions,
1865                                          const t_inputrec*        ir)
1866 {
1867     DlbState dlbState = DlbState::offCanTurnOn;
1868
1869     switch (dlbOption)
1870     {
1871         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1872         case DlbOption::no: dlbState = DlbState::offUser; break;
1873         case DlbOption::yes: dlbState = DlbState::onUser; break;
1874         default: gmx_incons("Invalid dlbOption enum value");
1875     }
1876
1877     /* Reruns don't support DLB: bail or override auto mode */
1878     if (mdrunOptions.rerun)
1879     {
1880         std::string reasonStr = "it is not supported in reruns.";
1881         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1882     }
1883
1884     /* Unsupported integrators */
1885     if (!EI_DYNAMICS(ir->eI))
1886     {
1887         auto reasonStr = gmx::formatString(
1888                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1889         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1890     }
1891
1892     /* Without cycle counters we can't time work to balance on */
1893     if (!bRecordLoad)
1894     {
1895         std::string reasonStr =
1896                 "cycle counters unsupported or not enabled in the operating system kernel.";
1897         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1898     }
1899
1900     if (mdrunOptions.reproducible)
1901     {
1902         std::string reasonStr = "you started a reproducible run.";
1903         switch (dlbState)
1904         {
1905             case DlbState::offUser: break;
1906             case DlbState::offForever:
1907                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1908                 break;
1909             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1910             case DlbState::onCanTurnOff:
1911                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1912                 break;
1913             case DlbState::onUser:
1914                 return forceDlbOffOrBail(
1915                         dlbState,
1916                         reasonStr
1917                                 + " In load balanced runs binary reproducibility cannot be "
1918                                   "ensured.",
1919                         mdlog);
1920             default:
1921                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1922                           static_cast<int>(dlbState));
1923         }
1924     }
1925
1926     return dlbState;
1927 }
1928
1929 static gmx_domdec_comm_t* init_dd_comm()
1930 {
1931     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1932
1933     comm->n_load_have    = 0;
1934     comm->n_load_collect = 0;
1935
1936     comm->haveTurnedOffDlb = false;
1937
1938     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1939     {
1940         comm->sum_nat[i] = 0;
1941     }
1942     comm->ndecomp   = 0;
1943     comm->nload     = 0;
1944     comm->load_step = 0;
1945     comm->load_sum  = 0;
1946     comm->load_max  = 0;
1947     clear_ivec(comm->load_lim);
1948     comm->load_mdf = 0;
1949     comm->load_pme = 0;
1950
1951     /* This should be replaced by a unique pointer */
1952     comm->balanceRegion = ddBalanceRegionAllocate();
1953
1954     return comm;
1955 }
1956
1957 /* Returns whether mtop contains constraints and/or vsites */
1958 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1959 {
1960     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1961     int  nmol;
1962     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1963     {
1964         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1965         {
1966             return true;
1967         }
1968     }
1969
1970     return false;
1971 }
1972
1973 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1974                               const gmx_mtop_t&    mtop,
1975                               const t_inputrec&    inputrec,
1976                               const real           cutoffMargin,
1977                               DDSystemInfo*        systemInfo)
1978 {
1979     /* When we have constraints and/or vsites, it is beneficial to use
1980      * update groups (when possible) to allow independent update of groups.
1981      */
1982     if (!systemHasConstraintsOrVsites(mtop))
1983     {
1984         /* No constraints or vsites, atoms can be updated independently */
1985         return;
1986     }
1987
1988     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1989     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1990                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1991
1992     if (systemInfo->useUpdateGroups)
1993     {
1994         int numUpdateGroups = 0;
1995         for (const auto& molblock : mtop.molblock)
1996         {
1997             numUpdateGroups += molblock.nmol
1998                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1999         }
2000
2001         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
2002                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
2003
2004         /* To use update groups, the large domain-to-domain cutoff distance
2005          * should be compatible with the box size.
2006          */
2007         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2008
2009         if (systemInfo->useUpdateGroups)
2010         {
2011             GMX_LOG(mdlog.info)
2012                     .appendTextFormatted(
2013                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
2014                             "nm\n",
2015                             numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
2016                             systemInfo->maxUpdateGroupRadius);
2017         }
2018         else
2019         {
2020             GMX_LOG(mdlog.info)
2021                     .appendTextFormatted(
2022                             "The combination of rlist and box size prohibits the use of update "
2023                             "groups\n");
2024             systemInfo->updateGroupingPerMoleculetype.clear();
2025         }
2026     }
2027 }
2028
2029 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2030     npbcdim(ePBC2npbcdim(ir.ePBC)),
2031     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2032     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2033     haveScrewPBC(ir.ePBC == epbcSCREW)
2034 {
2035 }
2036
2037 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2038 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2039                                     const bool                                  useUpdateGroups,
2040                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2041 {
2042     if (useUpdateGroups)
2043     {
2044         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2045                            "Need one grouping per moltype");
2046         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2047         {
2048             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2049             {
2050                 return false;
2051             }
2052         }
2053     }
2054     else
2055     {
2056         for (const auto& moltype : mtop.moltype)
2057         {
2058             if (moltype.atoms.nr > 1)
2059             {
2060                 return false;
2061             }
2062         }
2063     }
2064
2065     return true;
2066 }
2067
2068 /*! \brief Generate the simulation system information */
2069 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2070                                   const t_commrec*               cr,
2071                                   const DomdecOptions&           options,
2072                                   const gmx_mtop_t&              mtop,
2073                                   const t_inputrec&              ir,
2074                                   const matrix                   box,
2075                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2076 {
2077     const real tenPercentMargin = 1.1;
2078
2079     DDSystemInfo systemInfo;
2080
2081     /* We need to decide on update groups early, as this affects communication distances */
2082     systemInfo.useUpdateGroups = false;
2083     if (ir.cutoff_scheme == ecutsVERLET)
2084     {
2085         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2086         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2087     }
2088
2089     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2090             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2091     systemInfo.haveInterDomainBondeds =
2092             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2093     systemInfo.haveInterDomainMultiBodyBondeds =
2094             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2095
2096     if (systemInfo.useUpdateGroups)
2097     {
2098         systemInfo.haveSplitConstraints = false;
2099         systemInfo.haveSplitSettles     = false;
2100     }
2101     else
2102     {
2103         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2104                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2105         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2106     }
2107
2108     if (ir.rlist == 0)
2109     {
2110         /* Set the cut-off to some very large value,
2111          * so we don't need if statements everywhere in the code.
2112          * We use sqrt, since the cut-off is squared in some places.
2113          */
2114         systemInfo.cutoff = GMX_CUTOFF_INF;
2115     }
2116     else
2117     {
2118         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2119     }
2120     systemInfo.minCutoffForMultiBody = 0;
2121
2122     /* Determine the minimum cell size limit, affected by many factors */
2123     systemInfo.cellsizeLimit             = 0;
2124     systemInfo.filterBondedCommunication = false;
2125
2126     /* We do not allow home atoms to move beyond the neighboring domain
2127      * between domain decomposition steps, which limits the cell size.
2128      * Get an estimate of cell size limit due to atom displacement.
2129      * In most cases this is a large overestimate, because it assumes
2130      * non-interaction atoms.
2131      * We set the chance to 1 in a trillion steps.
2132      */
2133     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2134     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2135             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2136     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2137
2138     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2139
2140     /* TODO: PME decomposition currently requires atoms not to be more than
2141      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2142      *       In nearly all cases, limitForAtomDisplacement will be smaller
2143      *       than 2/3*rlist, so the PME requirement is satisfied.
2144      *       But it would be better for both correctness and performance
2145      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2146      *       Note that we would need to improve the pairlist buffer case.
2147      */
2148
2149     if (systemInfo.haveInterDomainBondeds)
2150     {
2151         if (options.minimumCommunicationRange > 0)
2152         {
2153             systemInfo.minCutoffForMultiBody =
2154                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2155             if (options.useBondedCommunication)
2156             {
2157                 systemInfo.filterBondedCommunication =
2158                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2159             }
2160             else
2161             {
2162                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2163             }
2164         }
2165         else if (ir.bPeriodicMols)
2166         {
2167             /* Can not easily determine the required cut-off */
2168             GMX_LOG(mdlog.warning)
2169                     .appendText(
2170                             "NOTE: Periodic molecules are present in this system. Because of this, "
2171                             "the domain decomposition algorithm cannot easily determine the "
2172                             "minimum cell size that it requires for treating bonded interactions. "
2173                             "Instead, domain decomposition will assume that half the non-bonded "
2174                             "cut-off will be a suitable lower bound.");
2175             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2176         }
2177         else
2178         {
2179             real r_2b, r_mb;
2180
2181             if (MASTER(cr))
2182             {
2183                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2184                                       options.checkBondedInteractions, &r_2b, &r_mb);
2185             }
2186             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2187             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2188
2189             /* We use an initial margin of 10% for the minimum cell size,
2190              * except when we are just below the non-bonded cut-off.
2191              */
2192             if (options.useBondedCommunication)
2193             {
2194                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2195                 {
2196                     const real r_bonded              = std::max(r_2b, r_mb);
2197                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2198                     /* This is the (only) place where we turn on the filtering */
2199                     systemInfo.filterBondedCommunication = true;
2200                 }
2201                 else
2202                 {
2203                     const real r_bonded = r_mb;
2204                     systemInfo.minCutoffForMultiBody =
2205                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2206                 }
2207                 /* We determine cutoff_mbody later */
2208                 systemInfo.increaseMultiBodyCutoff = true;
2209             }
2210             else
2211             {
2212                 /* No special bonded communication,
2213                  * simply increase the DD cut-off.
2214                  */
2215                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2216                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2217             }
2218         }
2219         GMX_LOG(mdlog.info)
2220                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2221                                      systemInfo.minCutoffForMultiBody);
2222
2223         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2224     }
2225
2226     systemInfo.constraintCommunicationRange = 0;
2227     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2228     {
2229         /* There is a cell size limit due to the constraints (P-LINCS) */
2230         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2231         GMX_LOG(mdlog.info)
2232                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2233                                      systemInfo.constraintCommunicationRange);
2234         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2235         {
2236             GMX_LOG(mdlog.info)
2237                     .appendText(
2238                             "This distance will limit the DD cell size, you can override this with "
2239                             "-rcon");
2240         }
2241     }
2242     else if (options.constraintCommunicationRange > 0)
2243     {
2244         /* Here we do not check for dd->splitConstraints.
2245          * because one can also set a cell size limit for virtual sites only
2246          * and at this point we don't know yet if there are intercg v-sites.
2247          */
2248         GMX_LOG(mdlog.info)
2249                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2250                                      options.constraintCommunicationRange);
2251         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2252     }
2253     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2254
2255     return systemInfo;
2256 }
2257
2258 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2259  * implemented. */
2260 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2261                              const t_commrec*     cr,
2262                              const DomdecOptions& options,
2263                              const DDSettings&    ddSettings,
2264                              const DDSystemInfo&  systemInfo,
2265                              const real           cellsizeLimit,
2266                              const gmx_ddbox_t&   ddbox)
2267 {
2268     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2269     {
2270         char     buf[STRLEN];
2271         gmx_bool bC = (systemInfo.haveSplitConstraints
2272                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2273         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2274                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2275                 bC ? " or your LINCS settings" : "");
2276
2277         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2278                              "There is no domain decomposition for %d ranks that is compatible "
2279                              "with the given box and a minimum cell size of %g nm\n"
2280                              "%s\n"
2281                              "Look in the log file for details on the domain decomposition",
2282                              cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2283     }
2284
2285     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2286     if (acs < cellsizeLimit)
2287     {
2288         if (options.numCells[XX] <= 0)
2289         {
2290             GMX_RELEASE_ASSERT(
2291                     false,
2292                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2293         }
2294         else
2295         {
2296             gmx_fatal_collective(
2297                     FARGS, cr->mpi_comm_mysim, MASTER(cr),
2298                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2299                     "options -dd, -rdd or -rcon, see the log file for details",
2300                     acs, cellsizeLimit);
2301         }
2302     }
2303
2304     const int numPPRanks =
2305             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2306     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2307     {
2308         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2309                              "The size of the domain decomposition grid (%d) does not match the "
2310                              "number of PP ranks (%d). The total number of ranks is %d",
2311                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2312     }
2313     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2314     {
2315         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2316                              "The number of separate PME ranks (%d) is larger than the number of "
2317                              "PP ranks (%d), this is not supported.",
2318                              ddGridSetup.numPmeOnlyRanks, numPPRanks);
2319     }
2320 }
2321
2322 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2323 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2324                                   t_commrec*           cr,
2325                                   const DDGridSetup&   ddGridSetup,
2326                                   const t_inputrec&    ir)
2327 {
2328     GMX_LOG(mdlog.info)
2329             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2330                                  ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2331                                  ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2332
2333     DDRankSetup ddRankSetup;
2334
2335     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2336     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2337
2338     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2339     if (ddRankSetup.usePmeOnlyRanks)
2340     {
2341         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2342     }
2343     else
2344     {
2345         ddRankSetup.numRanksDoingPme =
2346                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2347     }
2348
2349     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2350     {
2351         /* The following choices should match those
2352          * in comm_cost_est in domdec_setup.c.
2353          * Note that here the checks have to take into account
2354          * that the decomposition might occur in a different order than xyz
2355          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2356          * in which case they will not match those in comm_cost_est,
2357          * but since that is mainly for testing purposes that's fine.
2358          */
2359         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2360             && ddGridSetup.ddDimensions[1] == YY
2361             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2362             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2363             && getenv("GMX_PMEONEDD") == nullptr)
2364         {
2365             ddRankSetup.npmedecompdim = 2;
2366             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2367             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2368         }
2369         else
2370         {
2371             /* In case nc is 1 in both x and y we could still choose to
2372              * decompose pme in y instead of x, but we use x for simplicity.
2373              */
2374             ddRankSetup.npmedecompdim = 1;
2375             if (ddGridSetup.ddDimensions[0] == YY)
2376             {
2377                 ddRankSetup.npmenodes_x = 1;
2378                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2379             }
2380             else
2381             {
2382                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2383                 ddRankSetup.npmenodes_y = 1;
2384             }
2385         }
2386         GMX_LOG(mdlog.info)
2387                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2388                                      ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2389     }
2390     else
2391     {
2392         ddRankSetup.npmedecompdim = 0;
2393         ddRankSetup.npmenodes_x   = 0;
2394         ddRankSetup.npmenodes_y   = 0;
2395     }
2396
2397     return ddRankSetup;
2398 }
2399
2400 /*! \brief Set the cell size and interaction limits */
2401 static void set_dd_limits(const gmx::MDLogger& mdlog,
2402                           t_commrec*           cr,
2403                           gmx_domdec_t*        dd,
2404                           const DomdecOptions& options,
2405                           const DDSettings&    ddSettings,
2406                           const DDSystemInfo&  systemInfo,
2407                           const DDGridSetup&   ddGridSetup,
2408                           const int            numPPRanks,
2409                           const gmx_mtop_t*    mtop,
2410                           const t_inputrec*    ir,
2411                           const gmx_ddbox_t&   ddbox)
2412 {
2413     gmx_domdec_comm_t* comm = dd->comm;
2414     comm->ddSettings        = ddSettings;
2415
2416     /* Initialize to GPU share count to 0, might change later */
2417     comm->nrank_gpu_shared = 0;
2418
2419     comm->dlbState = comm->ddSettings.initialDlbState;
2420     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2421     /* To consider turning DLB on after 2*nstlist steps we need to check
2422      * at partitioning count 3. Thus we need to increase the first count by 2.
2423      */
2424     comm->ddPartioningCountFirstDlbOff += 2;
2425
2426     comm->bPMELoadBalDLBLimits = FALSE;
2427
2428     /* Allocate the charge group/atom sorting struct */
2429     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2430
2431     comm->systemInfo = systemInfo;
2432
2433     if (systemInfo.useUpdateGroups)
2434     {
2435         /* Note: We would like to use dd->nnodes for the atom count estimate,
2436          *       but that is not yet available here. But this anyhow only
2437          *       affect performance up to the second dd_partition_system call.
2438          */
2439         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2440         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2441                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2442                 homeAtomCountEstimate);
2443     }
2444
2445     /* Set the DD setup given by ddGridSetup */
2446     copy_ivec(ddGridSetup.numDomains, dd->nc);
2447     dd->ndim = ddGridSetup.numDDDimensions;
2448     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2449
2450     dd->nnodes = dd->nc[XX] * dd->nc[YY] * dd->nc[ZZ];
2451
2452     snew(comm->slb_frac, DIM);
2453     if (isDlbDisabled(comm))
2454     {
2455         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2456         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2457         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2458     }
2459
2460     /* Set the multi-body cut-off and cellsize limit for DLB */
2461     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2462     comm->cellsize_limit = systemInfo.cellsizeLimit;
2463     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2464     {
2465         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2466         {
2467             /* Set the bonded communication distance to halfway
2468              * the minimum and the maximum,
2469              * since the extra communication cost is nearly zero.
2470              */
2471             real acs           = average_cellsize_min(ddbox, dd->nc);
2472             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2473             if (!isDlbDisabled(comm))
2474             {
2475                 /* Check if this does not limit the scaling */
2476                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2477             }
2478             if (!systemInfo.filterBondedCommunication)
2479             {
2480                 /* Without bBondComm do not go beyond the n.b. cut-off */
2481                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2482                 if (comm->cellsize_limit >= systemInfo.cutoff)
2483                 {
2484                     /* We don't loose a lot of efficieny
2485                      * when increasing it to the n.b. cut-off.
2486                      * It can even be slightly faster, because we need
2487                      * less checks for the communication setup.
2488                      */
2489                     comm->cutoff_mbody = systemInfo.cutoff;
2490                 }
2491             }
2492             /* Check if we did not end up below our original limit */
2493             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2494
2495             if (comm->cutoff_mbody > comm->cellsize_limit)
2496             {
2497                 comm->cellsize_limit = comm->cutoff_mbody;
2498             }
2499         }
2500         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2501     }
2502
2503     if (debug)
2504     {
2505         fprintf(debug,
2506                 "Bonded atom communication beyond the cut-off: %s\n"
2507                 "cellsize limit %f\n",
2508                 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2509     }
2510
2511     if (MASTER(cr))
2512     {
2513         check_dd_restrictions(dd, ir, mdlog);
2514     }
2515 }
2516
2517 void dd_init_bondeds(FILE*              fplog,
2518                      gmx_domdec_t*      dd,
2519                      const gmx_mtop_t*  mtop,
2520                      const gmx_vsite_t* vsite,
2521                      const t_inputrec*  ir,
2522                      gmx_bool           bBCheck,
2523                      cginfo_mb_t*       cginfo_mb)
2524 {
2525     gmx_domdec_comm_t* comm;
2526
2527     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2528
2529     comm = dd->comm;
2530
2531     if (comm->systemInfo.filterBondedCommunication)
2532     {
2533         /* Communicate atoms beyond the cut-off for bonded interactions */
2534         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2535     }
2536     else
2537     {
2538         /* Only communicate atoms based on cut-off */
2539         comm->bondedLinks = nullptr;
2540     }
2541 }
2542
2543 static void writeSettings(gmx::TextWriter*   log,
2544                           gmx_domdec_t*      dd,
2545                           const gmx_mtop_t*  mtop,
2546                           const t_inputrec*  ir,
2547                           gmx_bool           bDynLoadBal,
2548                           real               dlb_scale,
2549                           const gmx_ddbox_t* ddbox)
2550 {
2551     gmx_domdec_comm_t* comm;
2552     int                d;
2553     ivec               np;
2554     real               limit, shrink;
2555
2556     comm = dd->comm;
2557
2558     if (bDynLoadBal)
2559     {
2560         log->writeString("The maximum number of communication pulses is:");
2561         for (d = 0; d < dd->ndim; d++)
2562         {
2563             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2564         }
2565         log->ensureLineBreak();
2566         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2567                                 comm->cellsize_limit);
2568         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2569         log->writeString("The allowed shrink of domain decomposition cells is:");
2570         for (d = 0; d < DIM; d++)
2571         {
2572             if (dd->nc[d] > 1)
2573             {
2574                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2575                 {
2576                     shrink = 0;
2577                 }
2578                 else
2579                 {
2580                     shrink = comm->cellsize_min_dlb[d]
2581                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->nc[d]);
2582                 }
2583                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2584             }
2585         }
2586         log->ensureLineBreak();
2587     }
2588     else
2589     {
2590         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2591         log->writeString("The initial number of communication pulses is:");
2592         for (d = 0; d < dd->ndim; d++)
2593         {
2594             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2595         }
2596         log->ensureLineBreak();
2597         log->writeString("The initial domain decomposition cell size is:");
2598         for (d = 0; d < DIM; d++)
2599         {
2600             if (dd->nc[d] > 1)
2601             {
2602                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2603             }
2604         }
2605         log->ensureLineBreak();
2606         log->writeLine();
2607     }
2608
2609     const bool haveInterDomainVsites =
2610             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2611
2612     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2613         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2614     {
2615         std::string decompUnits;
2616         if (comm->systemInfo.useUpdateGroups)
2617         {
2618             decompUnits = "atom groups";
2619         }
2620         else
2621         {
2622             decompUnits = "atoms";
2623         }
2624
2625         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2626                                 decompUnits.c_str());
2627         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "",
2628                                 comm->systemInfo.cutoff);
2629
2630         if (bDynLoadBal)
2631         {
2632             limit = dd->comm->cellsize_limit;
2633         }
2634         else
2635         {
2636             if (dd->unitCellInfo.ddBoxIsDynamic)
2637             {
2638                 log->writeLine(
2639                         "(the following are initial values, they could change due to box "
2640                         "deformation)");
2641             }
2642             limit = dd->comm->cellsize_min[XX];
2643             for (d = 1; d < DIM; d++)
2644             {
2645                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2646             }
2647         }
2648
2649         if (comm->systemInfo.haveInterDomainBondeds)
2650         {
2651             log->writeLineFormatted("%40s  %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2652                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2653             log->writeLineFormatted("%40s  %-7s %6.3f nm", "multi-body bonded interactions",
2654                                     "(-rdd)",
2655                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2656                                             ? comm->cutoff_mbody
2657                                             : std::min(comm->systemInfo.cutoff, limit));
2658         }
2659         if (haveInterDomainVsites)
2660         {
2661             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2662         }
2663         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2664         {
2665             std::string separation =
2666                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2667             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2668         }
2669         log->ensureLineBreak();
2670     }
2671 }
2672
2673 static void logSettings(const gmx::MDLogger& mdlog,
2674                         gmx_domdec_t*        dd,
2675                         const gmx_mtop_t*    mtop,
2676                         const t_inputrec*    ir,
2677                         real                 dlb_scale,
2678                         const gmx_ddbox_t*   ddbox)
2679 {
2680     gmx::StringOutputStream stream;
2681     gmx::TextWriter         log(&stream);
2682     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2683     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2684     {
2685         {
2686             log.ensureEmptyLine();
2687             log.writeLine(
2688                     "When dynamic load balancing gets turned on, these settings will change to:");
2689         }
2690         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2691     }
2692     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2693 }
2694
2695 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2696                                 gmx_domdec_t*        dd,
2697                                 real                 dlb_scale,
2698                                 const t_inputrec*    ir,
2699                                 const gmx_ddbox_t*   ddbox)
2700 {
2701     gmx_domdec_comm_t* comm;
2702     int                d, dim, npulse, npulse_d_max, npulse_d;
2703     gmx_bool           bNoCutOff;
2704
2705     comm = dd->comm;
2706
2707     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2708
2709     /* Determine the maximum number of comm. pulses in one dimension */
2710
2711     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2712
2713     /* Determine the maximum required number of grid pulses */
2714     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2715     {
2716         /* Only a single pulse is required */
2717         npulse = 1;
2718     }
2719     else if (!bNoCutOff && comm->cellsize_limit > 0)
2720     {
2721         /* We round down slightly here to avoid overhead due to the latency
2722          * of extra communication calls when the cut-off
2723          * would be only slightly longer than the cell size.
2724          * Later cellsize_limit is redetermined,
2725          * so we can not miss interactions due to this rounding.
2726          */
2727         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2728     }
2729     else
2730     {
2731         /* There is no cell size limit */
2732         npulse = std::max(dd->nc[XX] - 1, std::max(dd->nc[YY] - 1, dd->nc[ZZ] - 1));
2733     }
2734
2735     if (!bNoCutOff && npulse > 1)
2736     {
2737         /* See if we can do with less pulses, based on dlb_scale */
2738         npulse_d_max = 0;
2739         for (d = 0; d < dd->ndim; d++)
2740         {
2741             dim      = dd->dim[d];
2742             npulse_d = static_cast<int>(
2743                     1
2744                     + dd->nc[dim] * comm->systemInfo.cutoff
2745                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2746             npulse_d_max = std::max(npulse_d_max, npulse_d);
2747         }
2748         npulse = std::min(npulse, npulse_d_max);
2749     }
2750
2751     /* This env var can override npulse */
2752     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2753     if (d > 0)
2754     {
2755         npulse = d;
2756     }
2757
2758     comm->maxpulse       = 1;
2759     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2760     for (d = 0; d < dd->ndim; d++)
2761     {
2762         if (comm->ddSettings.request1DAnd1Pulse)
2763         {
2764             comm->cd[d].np_dlb = 1;
2765         }
2766         else
2767         {
2768             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]] - 1);
2769             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2770         }
2771         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]] - 1)
2772         {
2773             comm->bVacDLBNoLimit = FALSE;
2774         }
2775     }
2776
2777     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2778     if (!comm->bVacDLBNoLimit)
2779     {
2780         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2781     }
2782     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2783     /* Set the minimum cell size for each DD dimension */
2784     for (d = 0; d < dd->ndim; d++)
2785     {
2786         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2787         {
2788             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2789         }
2790         else
2791         {
2792             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2793         }
2794     }
2795     if (comm->cutoff_mbody <= 0)
2796     {
2797         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2798     }
2799     if (isDlbOn(comm))
2800     {
2801         set_dlb_limits(dd);
2802     }
2803 }
2804
2805 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2806 {
2807     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2808 }
2809
2810 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, int ePBC)
2811 {
2812     /* If each molecule is a single charge group
2813      * or we use domain decomposition for each periodic dimension,
2814      * we do not need to take pbc into account for the bonded interactions.
2815      */
2816     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds
2817             && !(dd->nc[XX] > 1 && dd->nc[YY] > 1 && (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2818 }
2819
2820 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2821 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2822                                   gmx_domdec_t*        dd,
2823                                   real                 dlb_scale,
2824                                   const gmx_mtop_t*    mtop,
2825                                   const t_inputrec*    ir,
2826                                   const gmx_ddbox_t*   ddbox)
2827 {
2828     gmx_domdec_comm_t* comm        = dd->comm;
2829     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2830
2831     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2832     {
2833         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2834         if (ddRankSetup.npmedecompdim >= 2)
2835         {
2836             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2837         }
2838     }
2839     else
2840     {
2841         ddRankSetup.numRanksDoingPme = 0;
2842         if (dd->pme_nodeid >= 0)
2843         {
2844             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2845                                  "Can not have separate PME ranks without PME electrostatics");
2846         }
2847     }
2848
2849     if (debug)
2850     {
2851         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2852     }
2853     if (!isDlbDisabled(comm))
2854     {
2855         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2856     }
2857
2858     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2859
2860     real vol_frac;
2861     if (ir->ePBC == epbcNONE)
2862     {
2863         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2864     }
2865     else
2866     {
2867         vol_frac = (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))
2868                    / static_cast<double>(dd->nnodes);
2869     }
2870     if (debug)
2871     {
2872         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2873     }
2874     int natoms_tot = mtop->natoms;
2875
2876     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2877 }
2878
2879 /*! \brief Get some important DD parameters which can be modified by env.vars */
2880 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2881                                 const DomdecOptions&     options,
2882                                 const gmx::MdrunOptions& mdrunOptions,
2883                                 const t_inputrec&        ir)
2884 {
2885     DDSettings ddSettings;
2886
2887     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2888     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2889     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2890     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2891     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2892     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2893     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2894     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2895     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2896     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2897
2898     if (ddSettings.useSendRecv2)
2899     {
2900         GMX_LOG(mdlog.info)
2901                 .appendText(
2902                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2903                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2904                         "communication");
2905     }
2906
2907     if (ddSettings.eFlop)
2908     {
2909         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2910         ddSettings.recordLoad = true;
2911     }
2912     else
2913     {
2914         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2915     }
2916
2917     ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2918                                                           ddSettings.recordLoad, mdrunOptions, &ir);
2919     GMX_LOG(mdlog.info)
2920             .appendTextFormatted("Dynamic load balancing: %s",
2921                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2922
2923     return ddSettings;
2924 }
2925
2926 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2927
2928 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2929  *
2930  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2931  * generally requires a larger box than other possible decompositions
2932  * with the same rank count, so the calling code might need to decide
2933  * what is the most appropriate way to run the simulation based on
2934  * whether such a DD is possible.
2935  *
2936  * This function works like init_domain_decomposition(), but will not
2937  * give a fatal error, and only uses \c cr for communicating between
2938  * ranks.
2939  *
2940  * It is safe to call before thread-MPI spawns ranks, so that
2941  * thread-MPI can decide whether and how to trigger the GPU halo
2942  * exchange code path. The number of PME ranks, if any, should be set
2943  * in \c options.numPmeRanks.
2944  */
2945 static bool canMake1DAnd1PulseDomainDecomposition(const DDSettings&              ddSettingsOriginal,
2946                                                   const t_commrec*               cr,
2947                                                   const int                      numRanksRequested,
2948                                                   const DomdecOptions&           options,
2949                                                   const gmx_mtop_t&              mtop,
2950                                                   const t_inputrec&              ir,
2951                                                   const matrix                   box,
2952                                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2953 {
2954     // Ensure we don't write any output from this checking routine
2955     gmx::MDLogger dummyLogger;
2956
2957     DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2958
2959     DDSettings ddSettings             = ddSettingsOriginal;
2960     ddSettings.request1DAnd1Pulse     = true;
2961     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
2962             dummyLogger, ddSettings.request1DAnd1Pulse, !isDlbDisabled(ddSettings.initialDlbState),
2963             options.dlbScaling, ir, systemInfo.cellsizeLimit);
2964     gmx_ddbox_t ddbox = { 0 };
2965     DDGridSetup ddGridSetup =
2966             getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2967                            gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2968
2969     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2970
2971     return canMakeDDWith1DAnd1Pulse;
2972 }
2973
2974 bool is1DAnd1PulseDD(const gmx_domdec_t& dd)
2975 {
2976     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2977     const int  productOfDimensionSizes      = dd.nc[XX] * dd.nc[YY] * dd.nc[ZZ];
2978     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2979
2980     const bool hasMax1Pulse =
2981             ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff)
2982              || (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
2983
2984     return decompositionHasOneDimension && hasMax1Pulse;
2985 }
2986
2987 namespace gmx
2988 {
2989
2990 // TODO once the functionality stablizes, move this class and
2991 // supporting functionality into builder.cpp
2992 /*! \brief Impl class for DD builder */
2993 class DomainDecompositionBuilder::Impl
2994 {
2995 public:
2996     //! Constructor
2997     Impl(const MDLogger&      mdlog,
2998          t_commrec*           cr,
2999          const DomdecOptions& options,
3000          const MdrunOptions&  mdrunOptions,
3001          bool                 prefer1DAnd1Pulse,
3002          const gmx_mtop_t&    mtop,
3003          const t_inputrec&    ir,
3004          const matrix         box,
3005          ArrayRef<const RVec> xGlobal);
3006
3007     //! Build the resulting DD manager
3008     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
3009
3010     //! Objects used in constructing and configuring DD
3011     //! {
3012     //! Logging object
3013     const MDLogger& mdlog_;
3014     //! Communication object
3015     t_commrec* cr_;
3016     //! User-supplied options configuring DD behavior
3017     const DomdecOptions options_;
3018     //! Global system topology
3019     const gmx_mtop_t& mtop_;
3020     //! User input values from the tpr file
3021     const t_inputrec& ir_;
3022     //! }
3023
3024     //! Internal objects used in constructing DD
3025     //! {
3026     //! Settings combined from the user input
3027     DDSettings ddSettings_;
3028     //! Information derived from the simulation system
3029     DDSystemInfo systemInfo_;
3030     //! Box structure
3031     gmx_ddbox_t ddbox_ = { 0 };
3032     //! Organization of the DD grids
3033     DDGridSetup ddGridSetup_;
3034     //! Organzation of the DD ranks
3035     DDRankSetup ddRankSetup_;
3036     //! Number of DD cells in each dimension
3037     ivec ddCellIndex_ = { 0, 0, 0 };
3038     //! IDs of PME-only ranks
3039     std::vector<int> pmeRanks_;
3040     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3041     CartesianRankSetup cartSetup_;
3042     //! }
3043 };
3044
3045 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3046                                        t_commrec*           cr,
3047                                        const DomdecOptions& options,
3048                                        const MdrunOptions&  mdrunOptions,
3049                                        const bool           prefer1DAnd1Pulse,
3050                                        const gmx_mtop_t&    mtop,
3051                                        const t_inputrec&    ir,
3052                                        const matrix         box,
3053                                        ArrayRef<const RVec> xGlobal) :
3054     mdlog_(mdlog),
3055     cr_(cr),
3056     options_(options),
3057     mtop_(mtop),
3058     ir_(ir)
3059 {
3060     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3061
3062     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3063
3064     if (prefer1DAnd1Pulse
3065         && canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_,
3066                                                  ir_, box, xGlobal))
3067     {
3068         ddSettings_.request1DAnd1Pulse = true;
3069     }
3070
3071     if (ddSettings_.eFlop > 1)
3072     {
3073         /* Ensure that we have different random flop counts on different ranks */
3074         srand(1 + cr_->nodeid);
3075     }
3076
3077     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3078
3079     const int  numRanksRequested         = cr_->nnodes;
3080     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
3081     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
3082                                    options_.numPmeRanks, checkForLargePrimeFactors);
3083
3084     // DD grid setup uses a more different cell size limit for
3085     // automated setup than the one in systemInfo_. The latter is used
3086     // in set_dd_limits() to configure DLB, for example.
3087     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
3088             mdlog_, ddSettings_.request1DAnd1Pulse, !isDlbDisabled(ddSettings_.initialDlbState),
3089             options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3090     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3091                                   gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3092     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3093
3094     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3095
3096     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3097
3098     /* Generate the group communicator, also decides the duty of each rank */
3099     cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3100                                         ddCellIndex_, &pmeRanks_);
3101 }
3102
3103 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3104 {
3105     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3106
3107     copy_ivec(ddCellIndex_, dd->ci);
3108
3109     dd->comm = init_dd_comm();
3110
3111     dd->comm->ddRankSetup        = ddRankSetup_;
3112     dd->comm->cartesianRankSetup = cartSetup_;
3113
3114     set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3115                   ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3116
3117     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3118
3119     if (thisRankHasDuty(cr_, DUTY_PP))
3120     {
3121         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3122
3123         setup_neighbor_relations(dd);
3124     }
3125
3126     /* Set overallocation to avoid frequent reallocation of arrays */
3127     set_over_alloc_dd(TRUE);
3128
3129     dd->atomSets = atomSets;
3130
3131     return dd;
3132 }
3133
3134 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3135                                                        t_commrec*           cr,
3136                                                        const DomdecOptions& options,
3137                                                        const MdrunOptions&  mdrunOptions,
3138                                                        const bool           prefer1DAnd1Pulse,
3139                                                        const gmx_mtop_t&    mtop,
3140                                                        const t_inputrec&    ir,
3141                                                        const matrix         box,
3142                                                        ArrayRef<const RVec> xGlobal) :
3143     impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3144 {
3145 }
3146
3147 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3148 {
3149     return impl_->build(atomSets);
3150 }
3151
3152 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3153
3154 } // namespace gmx
3155
3156 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3157 {
3158     gmx_domdec_t* dd;
3159     gmx_ddbox_t   ddbox;
3160     int           d, dim, np;
3161     real          inv_cell_size;
3162     int           LocallyLimited;
3163
3164     dd = cr->dd;
3165
3166     set_ddbox(*dd, false, box, true, x, &ddbox);
3167
3168     LocallyLimited = 0;
3169
3170     for (d = 0; d < dd->ndim; d++)
3171     {
3172         dim = dd->dim[d];
3173
3174         inv_cell_size = DD_CELL_MARGIN * dd->nc[dim] / ddbox.box_size[dim];
3175         if (dd->unitCellInfo.ddBoxIsDynamic)
3176         {
3177             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3178         }
3179
3180         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3181
3182         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3183         {
3184             return FALSE;
3185         }
3186
3187         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3188         {
3189             if (np > dd->comm->cd[d].np_dlb)
3190             {
3191                 return FALSE;
3192             }
3193
3194             /* If a current local cell size is smaller than the requested
3195              * cut-off, we could still fix it, but this gets very complicated.
3196              * Without fixing here, we might actually need more checks.
3197              */
3198             real cellSizeAlongDim =
3199                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3200             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3201             {
3202                 LocallyLimited = 1;
3203             }
3204         }
3205     }
3206
3207     if (!isDlbDisabled(dd->comm))
3208     {
3209         /* If DLB is not active yet, we don't need to check the grid jumps.
3210          * Actually we shouldn't, because then the grid jump data is not set.
3211          */
3212         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3213         {
3214             LocallyLimited = 1;
3215         }
3216
3217         gmx_sumi(1, &LocallyLimited, cr);
3218
3219         if (LocallyLimited > 0)
3220         {
3221             return FALSE;
3222         }
3223     }
3224
3225     return TRUE;
3226 }
3227
3228 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3229 {
3230     gmx_bool bCutoffAllowed;
3231
3232     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3233
3234     if (bCutoffAllowed)
3235     {
3236         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3237     }
3238
3239     return bCutoffAllowed;
3240 }