src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/options.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/gpu_utils/device_stream_manager.h"
  68 #include "gromacs/gpu_utils/gpu_utils.h"
  69 #include "gromacs/hardware/hw_info.h"
  70 #include "gromacs/listed_forces/manage_threading.h"
  71 #include "gromacs/math/vec.h"
  72 #include "gromacs/math/vectypes.h"
  73 #include "gromacs/mdlib/calc_verletbuf.h"
  74 #include "gromacs/mdlib/constr.h"
  75 #include "gromacs/mdlib/constraintrange.h"
  76 #include "gromacs/mdlib/updategroups.h"
  77 #include "gromacs/mdlib/vsite.h"
  78 #include "gromacs/mdtypes/commrec.h"
  79 #include "gromacs/mdtypes/forceoutput.h"
  80 #include "gromacs/mdtypes/inputrec.h"
  81 #include "gromacs/mdtypes/mdrunoptions.h"
  82 #include "gromacs/mdtypes/state.h"
  83 #include "gromacs/pbcutil/ishift.h"
  84 #include "gromacs/pbcutil/pbc.h"
  85 #include "gromacs/pulling/pull.h"
  86 #include "gromacs/timing/wallcycle.h"
  87 #include "gromacs/topology/block.h"
  88 #include "gromacs/topology/idef.h"
  89 #include "gromacs/topology/ifunc.h"
  90 #include "gromacs/topology/mtop_lookup.h"
  91 #include "gromacs/topology/mtop_util.h"
  92 #include "gromacs/topology/topology.h"
  93 #include "gromacs/utility/basedefinitions.h"
  94 #include "gromacs/utility/basenetwork.h"
  95 #include "gromacs/utility/cstringutil.h"
  96 #include "gromacs/utility/exceptions.h"
  97 #include "gromacs/utility/fatalerror.h"
  98 #include "gromacs/utility/gmxmpi.h"
  99 #include "gromacs/utility/logger.h"
 100 #include "gromacs/utility/real.h"
 101 #include "gromacs/utility/smalloc.h"
 102 #include "gromacs/utility/strconvert.h"
 103 #include "gromacs/utility/stringstream.h"
 104 #include "gromacs/utility/stringutil.h"
 105 #include "gromacs/utility/textwriter.h"
 106
 107 #include "atomdistribution.h"
 108 #include "box.h"
 109 #include "cellsizes.h"
 110 #include "distribute.h"
 111 #include "domdec_constraints.h"
 112 #include "domdec_internal.h"
 113 #include "domdec_setup.h"
 114 #include "domdec_vsite.h"
 115 #include "redistribute.h"
 116 #include "utility.h"
 117
 118 // TODO remove this when moving domdec into gmx namespace
 119 using gmx::DdRankOrder;
 120 using gmx::DlbOption;
 121 using gmx::DomdecOptions;
 122
 123 static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
 124
 125 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 126 #define DD_CGIBS 2
 127
 128 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 129 #define DD_FLAG_NRCG 65535
 130 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 131 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 132
 133 /* The DD zone order */
 134 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 135                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 136
 137 /* The non-bonded zone-pair setup for domain decomposition
 138  * The first number is the i-zone, the second number the first j-zone seen by
 139  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 140  * As is, this is for 3D decomposition, where there are 4 i-zones.
 141  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 142  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 143  */
 144 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 145                                                                { 1, 3, 6 },
 146                                                                { 2, 5, 6 },
 147                                                                { 3, 5, 7 } };
 148
 149 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 150 {
 151     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 152     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 153     xyz[ZZ] = ind % nc[ZZ];
 154 }
 155
 156 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 157 {
 158     int ddnodeid = -1;
 159
 160     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 161     const int                 ddindex   = dd_index(dd->numCells, c);
 162     if (cartSetup.bCartesianPP_PME)
 163     {
 164         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 165     }
 166     else if (cartSetup.bCartesianPP)
 167     {
 168 #if GMX_MPI
 169         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 170 #endif
 171     }
 172     else
 173     {
 174         ddnodeid = ddindex;
 175     }
 176
 177     return ddnodeid;
 178 }
 179
 180 int ddglatnr(const gmx_domdec_t* dd, int i)
 181 {
 182     int atnr;
 183
 184     if (dd == nullptr)
 185     {
 186         atnr = i + 1;
 187     }
 188     else
 189     {
 190         if (i >= dd->comm->atomRanges.numAtomsTotal())
 191         {
 192             gmx_fatal(FARGS,
 193                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 194                       i, dd->comm->atomRanges.numAtomsTotal());
 195         }
 196         atnr = dd->globalAtomIndices[i] + 1;
 197     }
 198
 199     return atnr;
 200 }
 201
 202 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 203 {
 204     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 205     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 206 }
 207
 208 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 209 {
 210     int i;
 211
 212     if (state->ddp_count != dd->ddp_count)
 213     {
 214         gmx_incons("The MD state does not match the domain decomposition state");
 215     }
 216
 217     state->cg_gl.resize(dd->ncg_home);
 218     for (i = 0; i < dd->ncg_home; i++)
 219     {
 220         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 221     }
 222
 223     state->ddp_count_cg_gl = dd->ddp_count;
 224 }
 225
 226 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 227 {
 228     return &dd->comm->zones;
 229 }
 230
 231 int dd_numAtomsZones(const gmx_domdec_t& dd)
 232 {
 233     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 234 }
 235
 236 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 237 {
 238     return dd.comm->atomRanges.numHomeAtoms();
 239 }
 240
 241 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 242 {
 243     /* We currently set mdatoms entries for all atoms:
 244      * local + non-local + communicated for vsite + constraints
 245      */
 246
 247     return dd->comm->atomRanges.numAtomsTotal();
 248 }
 249
 250 int dd_natoms_vsite(const gmx_domdec_t* dd)
 251 {
 252     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 253 }
 254
 255 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 256 {
 257     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 258     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 259 }
 260
 261 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 262 {
 263     wallcycle_start(wcycle, ewcMOVEX);
 264
 265     int                    nzone, nat_tot;
 266     gmx_domdec_comm_t*     comm;
 267     gmx_domdec_comm_dim_t* cd;
 268     rvec                   shift = { 0, 0, 0 };
 269     gmx_bool               bPBC, bScrew;
 270
 271     comm = dd->comm;
 272
 273     nzone   = 1;
 274     nat_tot = comm->atomRanges.numHomeAtoms();
 275     for (int d = 0; d < dd->ndim; d++)
 276     {
 277         bPBC   = (dd->ci[dd->dim[d]] == 0);
 278         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 279         if (bPBC)
 280         {
 281             copy_rvec(box[dd->dim[d]], shift);
 282         }
 283         cd = &comm->cd[d];
 284         for (const gmx_domdec_ind_t& ind : cd->ind)
 285         {
 286             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 287             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 288             int                       n          = 0;
 289             if (!bPBC)
 290             {
 291                 for (int j : ind.index)
 292                 {
 293                     sendBuffer[n] = x[j];
 294                     n++;
 295                 }
 296             }
 297             else if (!bScrew)
 298             {
 299                 for (int j : ind.index)
 300                 {
 301                     /* We need to shift the coordinates */
 302                     for (int d = 0; d < DIM; d++)
 303                     {
 304                         sendBuffer[n][d] = x[j][d] + shift[d];
 305                     }
 306                     n++;
 307                 }
 308             }
 309             else
 310             {
 311                 for (int j : ind.index)
 312                 {
 313                     /* Shift x */
 314                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 315                     /* Rotate y and z.
 316                      * This operation requires a special shift force
 317                      * treatment, which is performed in calc_vir.
 318                      */
 319                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 320                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 321                     n++;
 322                 }
 323             }
 324
 325             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 326                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 327
 328             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 329             if (cd->receiveInPlace)
 330             {
 331                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 332             }
 333             else
 334             {
 335                 receiveBuffer = receiveBufferAccess.buffer;
 336             }
 337             /* Send and receive the coordinates */
 338             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 339
 340             if (!cd->receiveInPlace)
 341             {
 342                 int j = 0;
 343                 for (int zone = 0; zone < nzone; zone++)
 344                 {
 345                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 346                     {
 347                         x[i] = receiveBuffer[j++];
 348                     }
 349                 }
 350             }
 351             nat_tot += ind.nrecv[nzone + 1];
 352         }
 353         nzone += nzone;
 354     }
 355
 356     wallcycle_stop(wcycle, ewcMOVEX);
 357 }
 358
 359 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 360 {
 361     wallcycle_start(wcycle, ewcMOVEF);
 362
 363     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 364     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 365
 366     gmx_domdec_comm_t& comm    = *dd->comm;
 367     int                nzone   = comm.zones.n / 2;
 368     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 369     for (int d = dd->ndim - 1; d >= 0; d--)
 370     {
 371         /* Only forces in domains near the PBC boundaries need to
 372            consider PBC in the treatment of fshift */
 373         const bool shiftForcesNeedPbc =
 374                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 375         const bool applyScrewPbc =
 376                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 377         /* Determine which shift vector we need */
 378         ivec vis        = { 0, 0, 0 };
 379         vis[dd->dim[d]] = 1;
 380         const int is    = IVEC2IS(vis);
 381
 382         /* Loop over the pulses */
 383         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 384         for (int p = cd.numPulses() - 1; p >= 0; p--)
 385         {
 386             const gmx_domdec_ind_t&   ind = cd.ind[p];
 387             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 388             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 389
 390             nat_tot -= ind.nrecv[nzone + 1];
 391
 392             DDBufferAccess<gmx::RVec> sendBufferAccess(
 393                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 394
 395             gmx::ArrayRef<gmx::RVec> sendBuffer;
 396             if (cd.receiveInPlace)
 397             {
 398                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 399             }
 400             else
 401             {
 402                 sendBuffer = sendBufferAccess.buffer;
 403                 int j      = 0;
 404                 for (int zone = 0; zone < nzone; zone++)
 405                 {
 406                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 407                     {
 408                         sendBuffer[j++] = f[i];
 409                     }
 410                 }
 411             }
 412             /* Communicate the forces */
 413             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 414             /* Add the received forces */
 415             int n = 0;
 416             if (!shiftForcesNeedPbc)
 417             {
 418                 for (int j : ind.index)
 419                 {
 420                     for (int d = 0; d < DIM; d++)
 421                     {
 422                         f[j][d] += receiveBuffer[n][d];
 423                     }
 424                     n++;
 425                 }
 426             }
 427             else if (!applyScrewPbc)
 428             {
 429                 for (int j : ind.index)
 430                 {
 431                     for (int d = 0; d < DIM; d++)
 432                     {
 433                         f[j][d] += receiveBuffer[n][d];
 434                     }
 435                     /* Add this force to the shift force */
 436                     for (int d = 0; d < DIM; d++)
 437                     {
 438                         fshift[is][d] += receiveBuffer[n][d];
 439                     }
 440                     n++;
 441                 }
 442             }
 443             else
 444             {
 445                 for (int j : ind.index)
 446                 {
 447                     /* Rotate the force */
 448                     f[j][XX] += receiveBuffer[n][XX];
 449                     f[j][YY] -= receiveBuffer[n][YY];
 450                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 451                     if (shiftForcesNeedPbc)
 452                     {
 453                         /* Add this force to the shift force */
 454                         for (int d = 0; d < DIM; d++)
 455                         {
 456                             fshift[is][d] += receiveBuffer[n][d];
 457                         }
 458                     }
 459                     n++;
 460                 }
 461             }
 462         }
 463         nzone /= 2;
 464     }
 465     wallcycle_stop(wcycle, ewcMOVEF);
 466 }
 467
 468 /* Convenience function for extracting a real buffer from an rvec buffer
 469  *
 470  * To reduce the number of temporary communication buffers and avoid
 471  * cache polution, we reuse gmx::RVec buffers for storing reals.
 472  * This functions return a real buffer reference with the same number
 473  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 474  */
 475 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 476 {
 477     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 478 }
 479
 480 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 481 {
 482     int                    nzone, nat_tot;
 483     gmx_domdec_comm_t*     comm;
 484     gmx_domdec_comm_dim_t* cd;
 485
 486     comm = dd->comm;
 487
 488     nzone   = 1;
 489     nat_tot = comm->atomRanges.numHomeAtoms();
 490     for (int d = 0; d < dd->ndim; d++)
 491     {
 492         cd = &comm->cd[d];
 493         for (const gmx_domdec_ind_t& ind : cd->ind)
 494         {
 495             /* Note: We provision for RVec instead of real, so a factor of 3
 496              * more than needed. The buffer actually already has this size
 497              * and we pass a plain pointer below, so this does not matter.
 498              */
 499             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 500             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 501             int                 n          = 0;
 502             for (int j : ind.index)
 503             {
 504                 sendBuffer[n++] = v[j];
 505             }
 506
 507             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 508                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 509
 510             gmx::ArrayRef<real> receiveBuffer;
 511             if (cd->receiveInPlace)
 512             {
 513                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 514             }
 515             else
 516             {
 517                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 518             }
 519             /* Send and receive the data */
 520             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 521             if (!cd->receiveInPlace)
 522             {
 523                 int j = 0;
 524                 for (int zone = 0; zone < nzone; zone++)
 525                 {
 526                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 527                     {
 528                         v[i] = receiveBuffer[j++];
 529                     }
 530                 }
 531             }
 532             nat_tot += ind.nrecv[nzone + 1];
 533         }
 534         nzone += nzone;
 535     }
 536 }
 537
 538 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 539 {
 540     int                    nzone, nat_tot;
 541     gmx_domdec_comm_t*     comm;
 542     gmx_domdec_comm_dim_t* cd;
 543
 544     comm = dd->comm;
 545
 546     nzone   = comm->zones.n / 2;
 547     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 548     for (int d = dd->ndim - 1; d >= 0; d--)
 549     {
 550         cd = &comm->cd[d];
 551         for (int p = cd->numPulses() - 1; p >= 0; p--)
 552         {
 553             const gmx_domdec_ind_t& ind = cd->ind[p];
 554
 555             /* Note: We provision for RVec instead of real, so a factor of 3
 556              * more than needed. The buffer actually already has this size
 557              * and we typecast, so this works as intended.
 558              */
 559             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 560             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 561             nat_tot -= ind.nrecv[nzone + 1];
 562
 563             DDBufferAccess<gmx::RVec> sendBufferAccess(
 564                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 565
 566             gmx::ArrayRef<real> sendBuffer;
 567             if (cd->receiveInPlace)
 568             {
 569                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 570             }
 571             else
 572             {
 573                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 574                 int j      = 0;
 575                 for (int zone = 0; zone < nzone; zone++)
 576                 {
 577                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 578                     {
 579                         sendBuffer[j++] = v[i];
 580                     }
 581                 }
 582             }
 583             /* Communicate the forces */
 584             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 585             /* Add the received forces */
 586             int n = 0;
 587             for (int j : ind.index)
 588             {
 589                 v[j] += receiveBuffer[n];
 590                 n++;
 591             }
 592         }
 593         nzone /= 2;
 594     }
 595 }
 596
 597 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 598 {
 599     const gmx_domdec_comm_t& comm       = *dd->comm;
 600     const DDSystemInfo&      systemInfo = comm.systemInfo;
 601
 602     real r = -1;
 603     if (systemInfo.haveInterDomainMultiBodyBondeds)
 604     {
 605         if (comm.cutoff_mbody > 0)
 606         {
 607             r = comm.cutoff_mbody;
 608         }
 609         else
 610         {
 611             /* cutoff_mbody=0 means we do not have DLB */
 612             r = comm.cellsize_min[dd->dim[0]];
 613             for (int di = 1; di < dd->ndim; di++)
 614             {
 615                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 616             }
 617             if (comm.systemInfo.filterBondedCommunication)
 618             {
 619                 r = std::max(r, comm.cutoff_mbody);
 620             }
 621             else
 622             {
 623                 r = std::min(r, systemInfo.cutoff);
 624             }
 625         }
 626     }
 627
 628     return r;
 629 }
 630
 631 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 632 {
 633     real r_mb;
 634
 635     r_mb = dd_cutoff_multibody(dd);
 636
 637     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 638 }
 639
 640
 641 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 642                                    const CartesianRankSetup& cartSetup,
 643                                    const ivec                coord,
 644                                    ivec                      coord_pme)
 645 {
 646     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 647     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 648     copy_ivec(coord, coord_pme);
 649     coord_pme[cartSetup.cartpmedim] =
 650             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 651 }
 652
 653 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 654 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 655 {
 656     const int npp  = ddRankSetup.numPPRanks;
 657     const int npme = ddRankSetup.numRanksDoingPme;
 658
 659     /* Here we assign a PME node to communicate with this DD node
 660      * by assuming that the major index of both is x.
 661      * We add npme/2 to obtain an even distribution.
 662      */
 663     return (ddCellIndex * npme + npme / 2) / npp;
 664 }
 665
 666 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 667 {
 668     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 669
 670     int n = 0;
 671     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 672     {
 673         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 674         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 675         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 676         {
 677             if (debug)
 678             {
 679                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 680             }
 681             pmeRanks[n] = i + 1 + n;
 682             n++;
 683         }
 684     }
 685
 686     return pmeRanks;
 687 }
 688
 689 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 690 {
 691     gmx_domdec_t* dd;
 692     ivec          coords;
 693     int           slab;
 694
 695     dd         = cr->dd;
 696     coords[XX] = x;
 697     coords[YY] = y;
 698     coords[ZZ] = z;
 699     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
 700
 701     return slab;
 702 }
 703
 704 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 705 {
 706     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 707     ivec                      coords    = { x, y, z };
 708     int                       nodeid    = -1;
 709
 710     if (cartSetup.bCartesianPP_PME)
 711     {
 712 #if GMX_MPI
 713         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 714 #endif
 715     }
 716     else
 717     {
 718         int ddindex = dd_index(cr->dd->numCells, coords);
 719         if (cartSetup.bCartesianPP)
 720         {
 721             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 722         }
 723         else
 724         {
 725             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 726             {
 727                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 728             }
 729             else
 730             {
 731                 nodeid = ddindex;
 732             }
 733         }
 734     }
 735
 736     return nodeid;
 737 }
 738
 739 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 740                               const CartesianRankSetup& cartSetup,
 741                               gmx::ArrayRef<const int>  pmeRanks,
 742                               const t_commrec gmx_unused* cr,
 743                               const int                   sim_nodeid)
 744 {
 745     int pmenode = -1;
 746
 747     /* This assumes a uniform x domain decomposition grid cell size */
 748     if (cartSetup.bCartesianPP_PME)
 749     {
 750 #if GMX_MPI
 751         ivec coord, coord_pme;
 752         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 753         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 754         {
 755             /* This is a PP rank */
 756             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 757             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 758         }
 759 #endif
 760     }
 761     else if (cartSetup.bCartesianPP)
 762     {
 763         if (sim_nodeid < ddRankSetup.numPPRanks)
 764         {
 765             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 766         }
 767     }
 768     else
 769     {
 770         /* This assumes DD cells with identical x coordinates
 771          * are numbered sequentially.
 772          */
 773         if (pmeRanks.empty())
 774         {
 775             if (sim_nodeid < ddRankSetup.numPPRanks)
 776             {
 777                 /* The DD index equals the nodeid */
 778                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 779             }
 780         }
 781         else
 782         {
 783             int i = 0;
 784             while (sim_nodeid > pmeRanks[i])
 785             {
 786                 i++;
 787             }
 788             if (sim_nodeid < pmeRanks[i])
 789             {
 790                 pmenode = pmeRanks[i];
 791             }
 792         }
 793     }
 794
 795     return pmenode;
 796 }
 797
 798 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 799 {
 800     if (dd != nullptr)
 801     {
 802         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 803     }
 804     else
 805     {
 806         return { 1, 1 };
 807     }
 808 }
 809
 810 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 811 {
 812     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 813     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 814     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 815                        "This function should only be called when PME-only ranks are in use");
 816     std::vector<int> ddranks;
 817     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 818
 819     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 820     {
 821         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 822         {
 823             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 824             {
 825                 if (cartSetup.bCartesianPP_PME)
 826                 {
 827                     ivec coord = { x, y, z };
 828                     ivec coord_pme;
 829                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 830                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 831                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 832                     {
 833                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 834                     }
 835                 }
 836                 else
 837                 {
 838                     /* The slab corresponds to the nodeid in the PME group */
 839                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 840                     {
 841                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 842                     }
 843                 }
 844             }
 845         }
 846     }
 847     return ddranks;
 848 }
 849
 850 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 851 {
 852     gmx_bool bReceive = TRUE;
 853
 854     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 855     if (ddRankSetup.usePmeOnlyRanks)
 856     {
 857         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 858         if (cartSetup.bCartesianPP_PME)
 859         {
 860 #if GMX_MPI
 861             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 862             ivec coords;
 863             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 864             coords[cartSetup.cartpmedim]++;
 865             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 866             {
 867                 int rank;
 868                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 869                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 870                 {
 871                     /* This is not the last PP node for pmenode */
 872                     bReceive = FALSE;
 873                 }
 874             }
 875 #else
 876             GMX_RELEASE_ASSERT(
 877                     false,
 878                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 879 #endif
 880         }
 881         else
 882         {
 883             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 884             if (cr->sim_nodeid + 1 < cr->nnodes
 885                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 886             {
 887                 /* This is not the last PP node for pmenode */
 888                 bReceive = FALSE;
 889             }
 890         }
 891     }
 892
 893     return bReceive;
 894 }
 895
 896 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 897 {
 898     gmx_domdec_comm_t* comm;
 899     int                i;
 900
 901     comm = dd->comm;
 902
 903     snew(*dim_f, dd->numCells[dim] + 1);
 904     (*dim_f)[0] = 0;
 905     for (i = 1; i < dd->numCells[dim]; i++)
 906     {
 907         if (comm->slb_frac[dim])
 908         {
 909             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 910         }
 911         else
 912         {
 913             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 914         }
 915     }
 916     (*dim_f)[dd->numCells[dim]] = 1;
 917 }
 918
 919 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 920 {
 921     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 922
 923     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 924     {
 925         ddpme->dim = YY;
 926     }
 927     else
 928     {
 929         ddpme->dim = dimind;
 930     }
 931     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 932
 933     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 934
 935     if (ddpme->nslab <= 1)
 936     {
 937         return;
 938     }
 939
 940     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 941     /* Determine for each PME slab the PP location range for dimension dim */
 942     snew(ddpme->pp_min, ddpme->nslab);
 943     snew(ddpme->pp_max, ddpme->nslab);
 944     for (int slab = 0; slab < ddpme->nslab; slab++)
 945     {
 946         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 947         ddpme->pp_max[slab] = 0;
 948     }
 949     for (int i = 0; i < dd->nnodes; i++)
 950     {
 951         ivec xyz;
 952         ddindex2xyz(dd->numCells, i, xyz);
 953         /* For y only use our y/z slab.
 954          * This assumes that the PME x grid size matches the DD grid size.
 955          */
 956         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 957         {
 958             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 959             int       slab;
 960             if (dimind == 0)
 961             {
 962                 slab = pmeindex / nso;
 963             }
 964             else
 965             {
 966                 slab = pmeindex % ddpme->nslab;
 967             }
 968             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 969             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 970         }
 971     }
 972
 973     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 974 }
 975
 976 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
 977 {
 978     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 979
 980     if (ddRankSetup.ddpme[0].dim == XX)
 981     {
 982         return ddRankSetup.ddpme[0].maxshift;
 983     }
 984     else
 985     {
 986         return 0;
 987     }
 988 }
 989
 990 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
 991 {
 992     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 993
 994     if (ddRankSetup.ddpme[0].dim == YY)
 995     {
 996         return ddRankSetup.ddpme[0].maxshift;
 997     }
 998     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 999     {
1000         return ddRankSetup.ddpme[1].maxshift;
1001     }
1002     else
1003     {
1004         return 0;
1005     }
1006 }
1007
1008 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1009 {
1010     return dd.comm->systemInfo.haveSplitConstraints;
1011 }
1012
1013 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1014 {
1015     return dd.comm->systemInfo.useUpdateGroups;
1016 }
1017
1018 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1019 {
1020     /* Note that the cycles value can be incorrect, either 0 or some
1021      * extremely large value, when our thread migrated to another core
1022      * with an unsynchronized cycle counter. If this happens less often
1023      * that once per nstlist steps, this will not cause issues, since
1024      * we later subtract the maximum value from the sum over nstlist steps.
1025      * A zero count will slightly lower the total, but that's a small effect.
1026      * Note that the main purpose of the subtraction of the maximum value
1027      * is to avoid throwing off the load balancing when stalls occur due
1028      * e.g. system activity or network congestion.
1029      */
1030     dd->comm->cycl[ddCycl] += cycles;
1031     dd->comm->cycl_n[ddCycl]++;
1032     if (cycles > dd->comm->cycl_max[ddCycl])
1033     {
1034         dd->comm->cycl_max[ddCycl] = cycles;
1035     }
1036 }
1037
1038 #if GMX_MPI
1039 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1040 {
1041     MPI_Comm c_row;
1042     int      dim, i, rank;
1043     ivec     loc_c;
1044     gmx_bool bPartOfGroup = FALSE;
1045
1046     dim = dd->dim[dim_ind];
1047     copy_ivec(loc, loc_c);
1048     for (i = 0; i < dd->numCells[dim]; i++)
1049     {
1050         loc_c[dim] = i;
1051         rank       = dd_index(dd->numCells, loc_c);
1052         if (rank == dd->rank)
1053         {
1054             /* This process is part of the group */
1055             bPartOfGroup = TRUE;
1056         }
1057     }
1058     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1059     if (bPartOfGroup)
1060     {
1061         dd->comm->mpi_comm_load[dim_ind] = c_row;
1062         if (!isDlbDisabled(dd->comm))
1063         {
1064             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1065
1066             if (dd->ci[dim] == dd->master_ci[dim])
1067             {
1068                 /* This is the root process of this row */
1069                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1070
1071                 RowMaster& rowMaster = *cellsizes.rowMaster;
1072                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1073                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1074                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1075                 if (dim_ind > 0)
1076                 {
1077                     rowMaster.bounds.resize(dd->numCells[dim]);
1078                 }
1079                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1080             }
1081             else
1082             {
1083                 /* This is not a root process, we only need to receive cell_f */
1084                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1085             }
1086         }
1087         if (dd->ci[dim] == dd->master_ci[dim])
1088         {
1089             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1090         }
1091     }
1092 }
1093 #endif
1094
1095 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1096 {
1097 #if GMX_MPI
1098     int           physicalnode_id_hash;
1099     gmx_domdec_t* dd;
1100     MPI_Comm      mpi_comm_pp_physicalnode;
1101
1102     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1103     {
1104         /* Only ranks with short-ranged tasks (currently) use GPUs.
1105          * If we don't have GPUs assigned, there are no resources to share.
1106          */
1107         return;
1108     }
1109
1110     physicalnode_id_hash = gmx_physicalnode_id_hash();
1111
1112     dd = cr->dd;
1113
1114     if (debug)
1115     {
1116         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1117         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1118                 physicalnode_id_hash, gpu_id);
1119     }
1120     /* Split the PP communicator over the physical nodes */
1121     /* TODO: See if we should store this (before), as it's also used for
1122      * for the nodecomm summation.
1123      */
1124     // TODO PhysicalNodeCommunicator could be extended/used to handle
1125     // the need for per-node per-group communicators.
1126     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1127     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1128     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1129     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1130
1131     if (debug)
1132     {
1133         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1134     }
1135
1136     /* Note that some ranks could share a GPU, while others don't */
1137
1138     if (dd->comm->nrank_gpu_shared == 1)
1139     {
1140         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1141     }
1142 #else
1143     GMX_UNUSED_VALUE(cr);
1144     GMX_UNUSED_VALUE(gpu_id);
1145 #endif
1146 }
1147
1148 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1149 {
1150 #if GMX_MPI
1151     int  dim0, dim1, i, j;
1152     ivec loc;
1153
1154     if (debug)
1155     {
1156         fprintf(debug, "Making load communicators\n");
1157     }
1158
1159     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1160     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1161
1162     if (dd->ndim == 0)
1163     {
1164         return;
1165     }
1166
1167     clear_ivec(loc);
1168     make_load_communicator(dd, 0, loc);
1169     if (dd->ndim > 1)
1170     {
1171         dim0 = dd->dim[0];
1172         for (i = 0; i < dd->numCells[dim0]; i++)
1173         {
1174             loc[dim0] = i;
1175             make_load_communicator(dd, 1, loc);
1176         }
1177     }
1178     if (dd->ndim > 2)
1179     {
1180         dim0 = dd->dim[0];
1181         for (i = 0; i < dd->numCells[dim0]; i++)
1182         {
1183             loc[dim0] = i;
1184             dim1      = dd->dim[1];
1185             for (j = 0; j < dd->numCells[dim1]; j++)
1186             {
1187                 loc[dim1] = j;
1188                 make_load_communicator(dd, 2, loc);
1189             }
1190         }
1191     }
1192
1193     if (debug)
1194     {
1195         fprintf(debug, "Finished making load communicators\n");
1196     }
1197 #endif
1198 }
1199
1200 /*! \brief Sets up the relation between neighboring domains and zones */
1201 static void setup_neighbor_relations(gmx_domdec_t* dd)
1202 {
1203     int                 d, dim, m;
1204     ivec                tmp, s;
1205     gmx_domdec_zones_t* zones;
1206     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1207
1208     for (d = 0; d < dd->ndim; d++)
1209     {
1210         dim = dd->dim[d];
1211         copy_ivec(dd->ci, tmp);
1212         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1213         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1214         copy_ivec(dd->ci, tmp);
1215         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1216         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1217         if (debug)
1218         {
1219             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1220                     dd->neighbor[d][0], dd->neighbor[d][1]);
1221         }
1222     }
1223
1224     int nzone  = (1 << dd->ndim);
1225     int nizone = (1 << std::max(dd->ndim - 1, 0));
1226     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1227
1228     zones = &dd->comm->zones;
1229
1230     for (int i = 0; i < nzone; i++)
1231     {
1232         m = 0;
1233         clear_ivec(zones->shift[i]);
1234         for (d = 0; d < dd->ndim; d++)
1235         {
1236             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1237         }
1238     }
1239
1240     zones->n = nzone;
1241     for (int i = 0; i < nzone; i++)
1242     {
1243         for (d = 0; d < DIM; d++)
1244         {
1245             s[d] = dd->ci[d] - zones->shift[i][d];
1246             if (s[d] < 0)
1247             {
1248                 s[d] += dd->numCells[d];
1249             }
1250             else if (s[d] >= dd->numCells[d])
1251             {
1252                 s[d] -= dd->numCells[d];
1253             }
1254         }
1255     }
1256     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1257     {
1258         GMX_RELEASE_ASSERT(
1259                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1260                 "The first element for each ddNonbondedZonePairRanges should match its index");
1261
1262         DDPairInteractionRanges iZone;
1263         iZone.iZoneIndex = iZoneIndex;
1264         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1265          * j-zones up to nzone.
1266          */
1267         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1268                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1269         for (dim = 0; dim < DIM; dim++)
1270         {
1271             if (dd->numCells[dim] == 1)
1272             {
1273                 /* All shifts should be allowed */
1274                 iZone.shift0[dim] = -1;
1275                 iZone.shift1[dim] = 1;
1276             }
1277             else
1278             {
1279                 /* Determine the min/max j-zone shift wrt the i-zone */
1280                 iZone.shift0[dim] = 1;
1281                 iZone.shift1[dim] = -1;
1282                 for (int jZone : iZone.jZoneRange)
1283                 {
1284                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1285                     if (shift_diff < iZone.shift0[dim])
1286                     {
1287                         iZone.shift0[dim] = shift_diff;
1288                     }
1289                     if (shift_diff > iZone.shift1[dim])
1290                     {
1291                         iZone.shift1[dim] = shift_diff;
1292                     }
1293                 }
1294             }
1295         }
1296
1297         zones->iZones.push_back(iZone);
1298     }
1299
1300     if (!isDlbDisabled(dd->comm))
1301     {
1302         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1303     }
1304
1305     if (dd->comm->ddSettings.recordLoad)
1306     {
1307         make_load_communicators(dd);
1308     }
1309 }
1310
1311 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1312                                  gmx_domdec_t*        dd,
1313                                  t_commrec gmx_unused* cr,
1314                                  bool gmx_unused reorder)
1315 {
1316 #if GMX_MPI
1317     gmx_domdec_comm_t*  comm      = dd->comm;
1318     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1319
1320     if (cartSetup.bCartesianPP)
1321     {
1322         /* Set up cartesian communication for the particle-particle part */
1323         GMX_LOG(mdlog.info)
1324                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1325                                      dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
1326
1327         ivec periods;
1328         for (int i = 0; i < DIM; i++)
1329         {
1330             periods[i] = TRUE;
1331         }
1332         MPI_Comm comm_cart;
1333         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
1334                         &comm_cart);
1335         /* We overwrite the old communicator with the new cartesian one */
1336         cr->mpi_comm_mygroup = comm_cart;
1337     }
1338
1339     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1340     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1341
1342     if (cartSetup.bCartesianPP_PME)
1343     {
1344         /* Since we want to use the original cartesian setup for sim,
1345          * and not the one after split, we need to make an index.
1346          */
1347         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1348         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1349         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1350         /* Get the rank of the DD master,
1351          * above we made sure that the master node is a PP node.
1352          */
1353         int rank;
1354         if (MASTER(cr))
1355         {
1356             rank = dd->rank;
1357         }
1358         else
1359         {
1360             rank = 0;
1361         }
1362         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1363     }
1364     else if (cartSetup.bCartesianPP)
1365     {
1366         if (!comm->ddRankSetup.usePmeOnlyRanks)
1367         {
1368             /* The PP communicator is also
1369              * the communicator for this simulation
1370              */
1371             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1372         }
1373         cr->nodeid = dd->rank;
1374
1375         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1376
1377         /* We need to make an index to go from the coordinates
1378          * to the nodeid of this simulation.
1379          */
1380         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1381         std::vector<int> buf(dd->nnodes);
1382         if (thisRankHasDuty(cr, DUTY_PP))
1383         {
1384             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1385         }
1386         /* Communicate the ddindex to simulation nodeid index */
1387         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1388                       cr->mpi_comm_mysim);
1389
1390         /* Determine the master coordinates and rank.
1391          * The DD master should be the same node as the master of this sim.
1392          */
1393         for (int i = 0; i < dd->nnodes; i++)
1394         {
1395             if (cartSetup.ddindex2simnodeid[i] == 0)
1396             {
1397                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1398                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1399             }
1400         }
1401         if (debug)
1402         {
1403             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1404         }
1405     }
1406     else
1407     {
1408         /* No Cartesian communicators */
1409         /* We use the rank in dd->comm->all as DD index */
1410         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1411         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1412         dd->masterrank = 0;
1413         clear_ivec(dd->master_ci);
1414     }
1415 #endif
1416
1417     GMX_LOG(mdlog.info)
1418             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1419                                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1420     if (debug)
1421     {
1422         fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1423                 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1424     }
1425 }
1426
1427 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1428 {
1429 #if GMX_MPI
1430     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1431
1432     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1433     {
1434         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1435         std::vector<int> buf(dd->nnodes);
1436         if (thisRankHasDuty(cr, DUTY_PP))
1437         {
1438             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1439         }
1440         /* Communicate the ddindex to simulation nodeid index */
1441         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1442                       cr->mpi_comm_mysim);
1443     }
1444 #else
1445     GMX_UNUSED_VALUE(dd);
1446     GMX_UNUSED_VALUE(cr);
1447 #endif
1448 }
1449
1450 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1451                                              t_commrec*           cr,
1452                                              const DdRankOrder    ddRankOrder,
1453                                              bool gmx_unused    reorder,
1454                                              const DDRankSetup& ddRankSetup,
1455                                              ivec               ddCellIndex,
1456                                              std::vector<int>*  pmeRanks)
1457 {
1458     CartesianRankSetup cartSetup;
1459
1460     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1461     cartSetup.bCartesianPP_PME = false;
1462
1463     const ivec& numDDCells = ddRankSetup.numPPCells;
1464     /* Initially we set ntot to the number of PP cells */
1465     copy_ivec(numDDCells, cartSetup.ntot);
1466
1467     if (cartSetup.bCartesianPP)
1468     {
1469         const int numDDCellsTot = ddRankSetup.numPPRanks;
1470         bool      bDiv[DIM];
1471         for (int i = 1; i < DIM; i++)
1472         {
1473             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1474         }
1475         if (bDiv[YY] || bDiv[ZZ])
1476         {
1477             cartSetup.bCartesianPP_PME = TRUE;
1478             /* If we have 2D PME decomposition, which is always in x+y,
1479              * we stack the PME only nodes in z.
1480              * Otherwise we choose the direction that provides the thinnest slab
1481              * of PME only nodes as this will have the least effect
1482              * on the PP communication.
1483              * But for the PME communication the opposite might be better.
1484              */
1485             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1486             {
1487                 cartSetup.cartpmedim = ZZ;
1488             }
1489             else
1490             {
1491                 cartSetup.cartpmedim = YY;
1492             }
1493             cartSetup.ntot[cartSetup.cartpmedim] +=
1494                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1495         }
1496         else
1497         {
1498             GMX_LOG(mdlog.info)
1499                     .appendTextFormatted(
1500                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1501                             "nx*nz (%d*%d)",
1502                             ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1503                             numDDCells[XX], numDDCells[ZZ]);
1504             GMX_LOG(mdlog.info)
1505                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1506         }
1507     }
1508
1509     if (cartSetup.bCartesianPP_PME)
1510     {
1511 #if GMX_MPI
1512         int  rank;
1513         ivec periods;
1514
1515         GMX_LOG(mdlog.info)
1516                 .appendTextFormatted(
1517                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1518                         cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1519
1520         for (int i = 0; i < DIM; i++)
1521         {
1522             periods[i] = TRUE;
1523         }
1524         MPI_Comm comm_cart;
1525         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1526                         &comm_cart);
1527         MPI_Comm_rank(comm_cart, &rank);
1528         if (MASTER(cr) && rank != 0)
1529         {
1530             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1531         }
1532
1533         /* With this assigment we loose the link to the original communicator
1534          * which will usually be MPI_COMM_WORLD, unless have multisim.
1535          */
1536         cr->mpi_comm_mysim = comm_cart;
1537         cr->sim_nodeid     = rank;
1538
1539         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1540
1541         GMX_LOG(mdlog.info)
1542                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1543                                      ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1544
1545         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1546         {
1547             cr->duty = DUTY_PP;
1548         }
1549         if (!ddRankSetup.usePmeOnlyRanks
1550             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1551         {
1552             cr->duty = DUTY_PME;
1553         }
1554
1555         /* Split the sim communicator into PP and PME only nodes */
1556         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1557                        dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1558 #else
1559         GMX_UNUSED_VALUE(ddCellIndex);
1560 #endif
1561     }
1562     else
1563     {
1564         switch (ddRankOrder)
1565         {
1566             case DdRankOrder::pp_pme:
1567                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1568                 break;
1569             case DdRankOrder::interleave:
1570                 /* Interleave the PP-only and PME-only ranks */
1571                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1572                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1573                 break;
1574             case DdRankOrder::cartesian: break;
1575             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1576         }
1577
1578         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1579         {
1580             cr->duty = DUTY_PME;
1581         }
1582         else
1583         {
1584             cr->duty = DUTY_PP;
1585         }
1586 #if GMX_MPI
1587         /* Split the sim communicator into PP and PME only nodes */
1588         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1589         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1590 #endif
1591     }
1592
1593     GMX_LOG(mdlog.info)
1594             .appendTextFormatted("This rank does only %s work.\n",
1595                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1596
1597     return cartSetup;
1598 }
1599
1600 /*! \brief Makes the PP communicator and the PME communicator, when needed
1601  *
1602  * Returns the Cartesian rank setup.
1603  * Sets \p cr->mpi_comm_mygroup
1604  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1605  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1606  */
1607 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1608                                                  const DDSettings&    ddSettings,
1609                                                  const DdRankOrder    ddRankOrder,
1610                                                  const DDRankSetup&   ddRankSetup,
1611                                                  t_commrec*           cr,
1612                                                  ivec                 ddCellIndex,
1613                                                  std::vector<int>*    pmeRanks)
1614 {
1615     CartesianRankSetup cartSetup;
1616
1617     if (ddRankSetup.usePmeOnlyRanks)
1618     {
1619         /* Split the communicator into a PP and PME part */
1620         cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1621                                        ddRankSetup, ddCellIndex, pmeRanks);
1622     }
1623     else
1624     {
1625         /* All nodes do PP and PME */
1626         /* We do not require separate communicators */
1627         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1628
1629         cartSetup.bCartesianPP     = false;
1630         cartSetup.bCartesianPP_PME = false;
1631     }
1632
1633     return cartSetup;
1634 }
1635
1636 /*! \brief For PP ranks, sets or makes the communicator
1637  *
1638  * For PME ranks get the rank id.
1639  * For PP only ranks, sets the PME-only rank.
1640  */
1641 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1642                                     const DDSettings&        ddSettings,
1643                                     gmx::ArrayRef<const int> pmeRanks,
1644                                     t_commrec*               cr,
1645                                     const int                numAtomsInSystem,
1646                                     gmx_domdec_t*            dd)
1647 {
1648     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1649     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1650
1651     if (thisRankHasDuty(cr, DUTY_PP))
1652     {
1653         /* Copy or make a new PP communicator */
1654
1655         /* We (possibly) reordered the nodes in split_communicator,
1656          * so it is no longer required in make_pp_communicator.
1657          */
1658         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1659
1660         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1661     }
1662     else
1663     {
1664         receive_ddindex2simnodeid(dd, cr);
1665     }
1666
1667     if (!thisRankHasDuty(cr, DUTY_PME))
1668     {
1669         /* Set up the commnuication to our PME node */
1670         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1671         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1672         if (debug)
1673         {
1674             fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1675                     gmx::boolToString(dd->pme_receive_vir_ener));
1676         }
1677     }
1678     else
1679     {
1680         dd->pme_nodeid = -1;
1681     }
1682
1683     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1684     if (MASTER(cr))
1685     {
1686         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1687     }
1688 }
1689
1690 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1691 {
1692     real * slb_frac, tot;
1693     int    i, n;
1694     double dbl;
1695
1696     slb_frac = nullptr;
1697     if (nc > 1 && size_string != nullptr)
1698     {
1699         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1700         snew(slb_frac, nc);
1701         tot = 0;
1702         for (i = 0; i < nc; i++)
1703         {
1704             dbl = 0;
1705             sscanf(size_string, "%20lf%n", &dbl, &n);
1706             if (dbl == 0)
1707             {
1708                 gmx_fatal(FARGS,
1709                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1710                           dir, size_string);
1711             }
1712             slb_frac[i] = dbl;
1713             size_string += n;
1714             tot += slb_frac[i];
1715         }
1716         /* Normalize */
1717         std::string relativeCellSizes = "Relative cell sizes:";
1718         for (i = 0; i < nc; i++)
1719         {
1720             slb_frac[i] /= tot;
1721             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1722         }
1723         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1724     }
1725
1726     return slb_frac;
1727 }
1728
1729 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1730 {
1731     int                  n     = 0;
1732     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1733     int                  nmol;
1734     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1735     {
1736         for (auto& ilist : extractILists(*ilists, IF_BOND))
1737         {
1738             if (NRAL(ilist.functionType) > 2)
1739             {
1740                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1741             }
1742         }
1743     }
1744
1745     return n;
1746 }
1747
1748 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1749 {
1750     char* val;
1751     int   nst;
1752
1753     nst = def;
1754     val = getenv(env_var);
1755     if (val)
1756     {
1757         if (sscanf(val, "%20d", &nst) <= 0)
1758         {
1759             nst = 1;
1760         }
1761         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1762     }
1763
1764     return nst;
1765 }
1766
1767 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1768 {
1769     if (ir->pbcType == PbcType::Screw
1770         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1771     {
1772         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1773                   c_pbcTypeNames[ir->pbcType].c_str());
1774     }
1775
1776     if (ir->nstlist == 0)
1777     {
1778         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1779     }
1780
1781     if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1782     {
1783         GMX_LOG(mdlog.warning)
1784                 .appendText(
1785                         "comm-mode angular will give incorrect results when the comm group "
1786                         "partially crosses a periodic boundary");
1787     }
1788 }
1789
1790 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1791 {
1792     real r = ddbox.box_size[XX];
1793     for (int d = 0; d < DIM; d++)
1794     {
1795         if (numDomains[d] > 1)
1796         {
1797             /* Check using the initial average cell size */
1798             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1799         }
1800     }
1801
1802     return r;
1803 }
1804
1805 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1806  */
1807 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1808                                   const std::string&   reasonStr,
1809                                   const gmx::MDLogger& mdlog)
1810 {
1811     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1812     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1813
1814     if (cmdlineDlbState == DlbState::onUser)
1815     {
1816         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1817     }
1818     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1819     {
1820         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1821     }
1822     return DlbState::offForever;
1823 }
1824
1825 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1826  *
1827  * This function parses the parameters of "-dlb" command line option setting
1828  * corresponding state values. Then it checks the consistency of the determined
1829  * state with other run parameters and settings. As a result, the initial state
1830  * may be altered or an error may be thrown if incompatibility of options is detected.
1831  *
1832  * \param [in] mdlog       Logger.
1833  * \param [in] dlbOption   Enum value for the DLB option.
1834  * \param [in] bRecordLoad True if the load balancer is recording load information.
1835  * \param [in] mdrunOptions  Options for mdrun.
1836  * \param [in] ir          Pointer mdrun to input parameters.
1837  * \returns                DLB initial/startup state.
1838  */
1839 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1840                                          DlbOption                dlbOption,
1841                                          gmx_bool                 bRecordLoad,
1842                                          const gmx::MdrunOptions& mdrunOptions,
1843                                          const t_inputrec*        ir)
1844 {
1845     DlbState dlbState = DlbState::offCanTurnOn;
1846
1847     switch (dlbOption)
1848     {
1849         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1850         case DlbOption::no: dlbState = DlbState::offUser; break;
1851         case DlbOption::yes: dlbState = DlbState::onUser; break;
1852         default: gmx_incons("Invalid dlbOption enum value");
1853     }
1854
1855     /* Reruns don't support DLB: bail or override auto mode */
1856     if (mdrunOptions.rerun)
1857     {
1858         std::string reasonStr = "it is not supported in reruns.";
1859         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1860     }
1861
1862     /* Unsupported integrators */
1863     if (!EI_DYNAMICS(ir->eI))
1864     {
1865         auto reasonStr = gmx::formatString(
1866                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1867         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1868     }
1869
1870     /* Without cycle counters we can't time work to balance on */
1871     if (!bRecordLoad)
1872     {
1873         std::string reasonStr =
1874                 "cycle counters unsupported or not enabled in the operating system kernel.";
1875         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1876     }
1877
1878     if (mdrunOptions.reproducible)
1879     {
1880         std::string reasonStr = "you started a reproducible run.";
1881         switch (dlbState)
1882         {
1883             case DlbState::offUser: break;
1884             case DlbState::offForever:
1885                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1886                 break;
1887             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1888             case DlbState::onCanTurnOff:
1889                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1890                 break;
1891             case DlbState::onUser:
1892                 return forceDlbOffOrBail(
1893                         dlbState,
1894                         reasonStr
1895                                 + " In load balanced runs binary reproducibility cannot be "
1896                                   "ensured.",
1897                         mdlog);
1898             default:
1899                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1900                           static_cast<int>(dlbState));
1901         }
1902     }
1903
1904     return dlbState;
1905 }
1906
1907 static gmx_domdec_comm_t* init_dd_comm()
1908 {
1909     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1910
1911     comm->n_load_have    = 0;
1912     comm->n_load_collect = 0;
1913
1914     comm->haveTurnedOffDlb = false;
1915
1916     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1917     {
1918         comm->sum_nat[i] = 0;
1919     }
1920     comm->ndecomp   = 0;
1921     comm->nload     = 0;
1922     comm->load_step = 0;
1923     comm->load_sum  = 0;
1924     comm->load_max  = 0;
1925     clear_ivec(comm->load_lim);
1926     comm->load_mdf = 0;
1927     comm->load_pme = 0;
1928
1929     /* This should be replaced by a unique pointer */
1930     comm->balanceRegion = ddBalanceRegionAllocate();
1931
1932     return comm;
1933 }
1934
1935 /* Returns whether mtop contains constraints and/or vsites */
1936 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1937 {
1938     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1939     int  nmol;
1940     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1941     {
1942         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1943         {
1944             return true;
1945         }
1946     }
1947
1948     return false;
1949 }
1950
1951 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1952                               const gmx_mtop_t&    mtop,
1953                               const t_inputrec&    inputrec,
1954                               const real           cutoffMargin,
1955                               DDSystemInfo*        systemInfo)
1956 {
1957     /* When we have constraints and/or vsites, it is beneficial to use
1958      * update groups (when possible) to allow independent update of groups.
1959      */
1960     if (!systemHasConstraintsOrVsites(mtop))
1961     {
1962         /* No constraints or vsites, atoms can be updated independently */
1963         return;
1964     }
1965
1966     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1967     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1968                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1969
1970     if (systemInfo->useUpdateGroups)
1971     {
1972         int numUpdateGroups = 0;
1973         for (const auto& molblock : mtop.molblock)
1974         {
1975             numUpdateGroups += molblock.nmol
1976                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1977         }
1978
1979         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1980                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1981
1982         /* To use update groups, the large domain-to-domain cutoff distance
1983          * should be compatible with the box size.
1984          */
1985         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1986
1987         if (systemInfo->useUpdateGroups)
1988         {
1989             GMX_LOG(mdlog.info)
1990                     .appendTextFormatted(
1991                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1992                             "nm\n",
1993                             numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
1994                             systemInfo->maxUpdateGroupRadius);
1995         }
1996         else
1997         {
1998             GMX_LOG(mdlog.info)
1999                     .appendTextFormatted(
2000                             "The combination of rlist and box size prohibits the use of update "
2001                             "groups\n");
2002             systemInfo->updateGroupingPerMoleculetype.clear();
2003         }
2004     }
2005 }
2006
2007 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2008     npbcdim(numPbcDimensions(ir.pbcType)),
2009     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2010     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2011     haveScrewPBC(ir.pbcType == PbcType::Screw)
2012 {
2013 }
2014
2015 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2016 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2017                                     const bool                                  useUpdateGroups,
2018                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2019 {
2020     if (useUpdateGroups)
2021     {
2022         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2023                            "Need one grouping per moltype");
2024         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2025         {
2026             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2027             {
2028                 return false;
2029             }
2030         }
2031     }
2032     else
2033     {
2034         for (const auto& moltype : mtop.moltype)
2035         {
2036             if (moltype.atoms.nr > 1)
2037             {
2038                 return false;
2039             }
2040         }
2041     }
2042
2043     return true;
2044 }
2045
2046 /*! \brief Generate the simulation system information */
2047 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2048                                   const t_commrec*               cr,
2049                                   const DomdecOptions&           options,
2050                                   const gmx_mtop_t&              mtop,
2051                                   const t_inputrec&              ir,
2052                                   const matrix                   box,
2053                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2054 {
2055     const real tenPercentMargin = 1.1;
2056
2057     DDSystemInfo systemInfo;
2058
2059     /* We need to decide on update groups early, as this affects communication distances */
2060     systemInfo.useUpdateGroups = false;
2061     if (ir.cutoff_scheme == ecutsVERLET)
2062     {
2063         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2064         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2065     }
2066
2067     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2068             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2069     systemInfo.haveInterDomainBondeds =
2070             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2071     systemInfo.haveInterDomainMultiBodyBondeds =
2072             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2073
2074     if (systemInfo.useUpdateGroups)
2075     {
2076         systemInfo.haveSplitConstraints = false;
2077         systemInfo.haveSplitSettles     = false;
2078     }
2079     else
2080     {
2081         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2082                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2083         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2084     }
2085
2086     if (ir.rlist == 0)
2087     {
2088         /* Set the cut-off to some very large value,
2089          * so we don't need if statements everywhere in the code.
2090          * We use sqrt, since the cut-off is squared in some places.
2091          */
2092         systemInfo.cutoff = GMX_CUTOFF_INF;
2093     }
2094     else
2095     {
2096         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2097     }
2098     systemInfo.minCutoffForMultiBody = 0;
2099
2100     /* Determine the minimum cell size limit, affected by many factors */
2101     systemInfo.cellsizeLimit             = 0;
2102     systemInfo.filterBondedCommunication = false;
2103
2104     /* We do not allow home atoms to move beyond the neighboring domain
2105      * between domain decomposition steps, which limits the cell size.
2106      * Get an estimate of cell size limit due to atom displacement.
2107      * In most cases this is a large overestimate, because it assumes
2108      * non-interaction atoms.
2109      * We set the chance to 1 in a trillion steps.
2110      */
2111     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2112     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2113             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2114     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2115
2116     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2117
2118     /* TODO: PME decomposition currently requires atoms not to be more than
2119      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2120      *       In nearly all cases, limitForAtomDisplacement will be smaller
2121      *       than 2/3*rlist, so the PME requirement is satisfied.
2122      *       But it would be better for both correctness and performance
2123      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2124      *       Note that we would need to improve the pairlist buffer case.
2125      */
2126
2127     if (systemInfo.haveInterDomainBondeds)
2128     {
2129         if (options.minimumCommunicationRange > 0)
2130         {
2131             systemInfo.minCutoffForMultiBody =
2132                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2133             if (options.useBondedCommunication)
2134             {
2135                 systemInfo.filterBondedCommunication =
2136                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2137             }
2138             else
2139             {
2140                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2141             }
2142         }
2143         else if (ir.bPeriodicMols)
2144         {
2145             /* Can not easily determine the required cut-off */
2146             GMX_LOG(mdlog.warning)
2147                     .appendText(
2148                             "NOTE: Periodic molecules are present in this system. Because of this, "
2149                             "the domain decomposition algorithm cannot easily determine the "
2150                             "minimum cell size that it requires for treating bonded interactions. "
2151                             "Instead, domain decomposition will assume that half the non-bonded "
2152                             "cut-off will be a suitable lower bound.");
2153             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2154         }
2155         else
2156         {
2157             real r_2b, r_mb;
2158
2159             if (MASTER(cr))
2160             {
2161                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2162                                       options.checkBondedInteractions, &r_2b, &r_mb);
2163             }
2164             gmx_bcast(sizeof(r_2b), &r_2b, cr->mpi_comm_mygroup);
2165             gmx_bcast(sizeof(r_mb), &r_mb, cr->mpi_comm_mygroup);
2166
2167             /* We use an initial margin of 10% for the minimum cell size,
2168              * except when we are just below the non-bonded cut-off.
2169              */
2170             if (options.useBondedCommunication)
2171             {
2172                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2173                 {
2174                     const real r_bonded              = std::max(r_2b, r_mb);
2175                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2176                     /* This is the (only) place where we turn on the filtering */
2177                     systemInfo.filterBondedCommunication = true;
2178                 }
2179                 else
2180                 {
2181                     const real r_bonded = r_mb;
2182                     systemInfo.minCutoffForMultiBody =
2183                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2184                 }
2185                 /* We determine cutoff_mbody later */
2186                 systemInfo.increaseMultiBodyCutoff = true;
2187             }
2188             else
2189             {
2190                 /* No special bonded communication,
2191                  * simply increase the DD cut-off.
2192                  */
2193                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2194                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2195             }
2196         }
2197         GMX_LOG(mdlog.info)
2198                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2199                                      systemInfo.minCutoffForMultiBody);
2200
2201         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2202     }
2203
2204     systemInfo.constraintCommunicationRange = 0;
2205     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2206     {
2207         /* There is a cell size limit due to the constraints (P-LINCS) */
2208         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2209         GMX_LOG(mdlog.info)
2210                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2211                                      systemInfo.constraintCommunicationRange);
2212         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2213         {
2214             GMX_LOG(mdlog.info)
2215                     .appendText(
2216                             "This distance will limit the DD cell size, you can override this with "
2217                             "-rcon");
2218         }
2219     }
2220     else if (options.constraintCommunicationRange > 0)
2221     {
2222         /* Here we do not check for dd->splitConstraints.
2223          * because one can also set a cell size limit for virtual sites only
2224          * and at this point we don't know yet if there are intercg v-sites.
2225          */
2226         GMX_LOG(mdlog.info)
2227                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2228                                      options.constraintCommunicationRange);
2229         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2230     }
2231     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2232
2233     return systemInfo;
2234 }
2235
2236 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2237  * implemented. */
2238 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2239                              const t_commrec*     cr,
2240                              const DomdecOptions& options,
2241                              const DDSettings&    ddSettings,
2242                              const DDSystemInfo&  systemInfo,
2243                              const real           cellsizeLimit,
2244                              const gmx_ddbox_t&   ddbox)
2245 {
2246     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2247     {
2248         char     buf[STRLEN];
2249         gmx_bool bC = (systemInfo.haveSplitConstraints
2250                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2251         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2252                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2253                 bC ? " or your LINCS settings" : "");
2254
2255         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2256                              "There is no domain decomposition for %d ranks that is compatible "
2257                              "with the given box and a minimum cell size of %g nm\n"
2258                              "%s\n"
2259                              "Look in the log file for details on the domain decomposition",
2260                              cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2261     }
2262
2263     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2264     if (acs < cellsizeLimit)
2265     {
2266         if (options.numCells[XX] <= 0)
2267         {
2268             GMX_RELEASE_ASSERT(
2269                     false,
2270                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2271         }
2272         else
2273         {
2274             gmx_fatal_collective(
2275                     FARGS, cr->mpi_comm_mysim, MASTER(cr),
2276                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2277                     "options -dd, -rdd or -rcon, see the log file for details",
2278                     acs, cellsizeLimit);
2279         }
2280     }
2281
2282     const int numPPRanks =
2283             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2284     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2285     {
2286         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2287                              "The size of the domain decomposition grid (%d) does not match the "
2288                              "number of PP ranks (%d). The total number of ranks is %d",
2289                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2290     }
2291     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2292     {
2293         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2294                              "The number of separate PME ranks (%d) is larger than the number of "
2295                              "PP ranks (%d), this is not supported.",
2296                              ddGridSetup.numPmeOnlyRanks, numPPRanks);
2297     }
2298 }
2299
2300 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2301 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2302                                   t_commrec*           cr,
2303                                   const DDGridSetup&   ddGridSetup,
2304                                   const t_inputrec&    ir)
2305 {
2306     GMX_LOG(mdlog.info)
2307             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2308                                  ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2309                                  ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2310
2311     DDRankSetup ddRankSetup;
2312
2313     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2314     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2315
2316     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2317     if (ddRankSetup.usePmeOnlyRanks)
2318     {
2319         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2320     }
2321     else
2322     {
2323         ddRankSetup.numRanksDoingPme =
2324                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2325     }
2326
2327     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2328     {
2329         /* The following choices should match those
2330          * in comm_cost_est in domdec_setup.c.
2331          * Note that here the checks have to take into account
2332          * that the decomposition might occur in a different order than xyz
2333          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2334          * in which case they will not match those in comm_cost_est,
2335          * but since that is mainly for testing purposes that's fine.
2336          */
2337         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2338             && ddGridSetup.ddDimensions[1] == YY
2339             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2340             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2341             && getenv("GMX_PMEONEDD") == nullptr)
2342         {
2343             ddRankSetup.npmedecompdim = 2;
2344             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2345             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2346         }
2347         else
2348         {
2349             /* In case nc is 1 in both x and y we could still choose to
2350              * decompose pme in y instead of x, but we use x for simplicity.
2351              */
2352             ddRankSetup.npmedecompdim = 1;
2353             if (ddGridSetup.ddDimensions[0] == YY)
2354             {
2355                 ddRankSetup.npmenodes_x = 1;
2356                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2357             }
2358             else
2359             {
2360                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2361                 ddRankSetup.npmenodes_y = 1;
2362             }
2363         }
2364         GMX_LOG(mdlog.info)
2365                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2366                                      ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2367     }
2368     else
2369     {
2370         ddRankSetup.npmedecompdim = 0;
2371         ddRankSetup.npmenodes_x   = 0;
2372         ddRankSetup.npmenodes_y   = 0;
2373     }
2374
2375     return ddRankSetup;
2376 }
2377
2378 /*! \brief Set the cell size and interaction limits */
2379 static void set_dd_limits(const gmx::MDLogger& mdlog,
2380                           t_commrec*           cr,
2381                           gmx_domdec_t*        dd,
2382                           const DomdecOptions& options,
2383                           const DDSettings&    ddSettings,
2384                           const DDSystemInfo&  systemInfo,
2385                           const DDGridSetup&   ddGridSetup,
2386                           const int            numPPRanks,
2387                           const gmx_mtop_t*    mtop,
2388                           const t_inputrec*    ir,
2389                           const gmx_ddbox_t&   ddbox)
2390 {
2391     gmx_domdec_comm_t* comm = dd->comm;
2392     comm->ddSettings        = ddSettings;
2393
2394     /* Initialize to GPU share count to 0, might change later */
2395     comm->nrank_gpu_shared = 0;
2396
2397     comm->dlbState = comm->ddSettings.initialDlbState;
2398     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2399     /* To consider turning DLB on after 2*nstlist steps we need to check
2400      * at partitioning count 3. Thus we need to increase the first count by 2.
2401      */
2402     comm->ddPartioningCountFirstDlbOff += 2;
2403
2404     comm->bPMELoadBalDLBLimits = FALSE;
2405
2406     /* Allocate the charge group/atom sorting struct */
2407     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2408
2409     comm->systemInfo = systemInfo;
2410
2411     if (systemInfo.useUpdateGroups)
2412     {
2413         /* Note: We would like to use dd->nnodes for the atom count estimate,
2414          *       but that is not yet available here. But this anyhow only
2415          *       affect performance up to the second dd_partition_system call.
2416          */
2417         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2418         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2419                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2420                 homeAtomCountEstimate);
2421     }
2422
2423     /* Set the DD setup given by ddGridSetup */
2424     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2425     dd->ndim = ddGridSetup.numDDDimensions;
2426     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2427
2428     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2429
2430     snew(comm->slb_frac, DIM);
2431     if (isDlbDisabled(comm))
2432     {
2433         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2434         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2435         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2436     }
2437
2438     /* Set the multi-body cut-off and cellsize limit for DLB */
2439     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2440     comm->cellsize_limit = systemInfo.cellsizeLimit;
2441     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2442     {
2443         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2444         {
2445             /* Set the bonded communication distance to halfway
2446              * the minimum and the maximum,
2447              * since the extra communication cost is nearly zero.
2448              */
2449             real acs           = average_cellsize_min(ddbox, dd->numCells);
2450             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2451             if (!isDlbDisabled(comm))
2452             {
2453                 /* Check if this does not limit the scaling */
2454                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2455             }
2456             if (!systemInfo.filterBondedCommunication)
2457             {
2458                 /* Without bBondComm do not go beyond the n.b. cut-off */
2459                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2460                 if (comm->cellsize_limit >= systemInfo.cutoff)
2461                 {
2462                     /* We don't loose a lot of efficieny
2463                      * when increasing it to the n.b. cut-off.
2464                      * It can even be slightly faster, because we need
2465                      * less checks for the communication setup.
2466                      */
2467                     comm->cutoff_mbody = systemInfo.cutoff;
2468                 }
2469             }
2470             /* Check if we did not end up below our original limit */
2471             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2472
2473             if (comm->cutoff_mbody > comm->cellsize_limit)
2474             {
2475                 comm->cellsize_limit = comm->cutoff_mbody;
2476             }
2477         }
2478         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2479     }
2480
2481     if (debug)
2482     {
2483         fprintf(debug,
2484                 "Bonded atom communication beyond the cut-off: %s\n"
2485                 "cellsize limit %f\n",
2486                 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2487     }
2488
2489     if (MASTER(cr))
2490     {
2491         check_dd_restrictions(dd, ir, mdlog);
2492     }
2493 }
2494
2495 void dd_init_bondeds(FILE*                      fplog,
2496                      gmx_domdec_t*              dd,
2497                      const gmx_mtop_t&          mtop,
2498                      const gmx_vsite_t*         vsite,
2499                      const t_inputrec*          ir,
2500                      gmx_bool                   bBCheck,
2501                      gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
2502 {
2503     gmx_domdec_comm_t* comm;
2504
2505     dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2506
2507     comm = dd->comm;
2508
2509     if (comm->systemInfo.filterBondedCommunication)
2510     {
2511         /* Communicate atoms beyond the cut-off for bonded interactions */
2512         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2513     }
2514     else
2515     {
2516         /* Only communicate atoms based on cut-off */
2517         comm->bondedLinks = nullptr;
2518     }
2519 }
2520
2521 static void writeSettings(gmx::TextWriter*   log,
2522                           gmx_domdec_t*      dd,
2523                           const gmx_mtop_t*  mtop,
2524                           const t_inputrec*  ir,
2525                           gmx_bool           bDynLoadBal,
2526                           real               dlb_scale,
2527                           const gmx_ddbox_t* ddbox)
2528 {
2529     gmx_domdec_comm_t* comm;
2530     int                d;
2531     ivec               np;
2532     real               limit, shrink;
2533
2534     comm = dd->comm;
2535
2536     if (bDynLoadBal)
2537     {
2538         log->writeString("The maximum number of communication pulses is:");
2539         for (d = 0; d < dd->ndim; d++)
2540         {
2541             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2542         }
2543         log->ensureLineBreak();
2544         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2545                                 comm->cellsize_limit);
2546         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2547         log->writeString("The allowed shrink of domain decomposition cells is:");
2548         for (d = 0; d < DIM; d++)
2549         {
2550             if (dd->numCells[d] > 1)
2551             {
2552                 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2553                 {
2554                     shrink = 0;
2555                 }
2556                 else
2557                 {
2558                     shrink = comm->cellsize_min_dlb[d]
2559                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2560                 }
2561                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2562             }
2563         }
2564         log->ensureLineBreak();
2565     }
2566     else
2567     {
2568         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2569         log->writeString("The initial number of communication pulses is:");
2570         for (d = 0; d < dd->ndim; d++)
2571         {
2572             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2573         }
2574         log->ensureLineBreak();
2575         log->writeString("The initial domain decomposition cell size is:");
2576         for (d = 0; d < DIM; d++)
2577         {
2578             if (dd->numCells[d] > 1)
2579             {
2580                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2581             }
2582         }
2583         log->ensureLineBreak();
2584         log->writeLine();
2585     }
2586
2587     const bool haveInterDomainVsites =
2588             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2589
2590     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2591         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2592     {
2593         std::string decompUnits;
2594         if (comm->systemInfo.useUpdateGroups)
2595         {
2596             decompUnits = "atom groups";
2597         }
2598         else
2599         {
2600             decompUnits = "atoms";
2601         }
2602
2603         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2604                                 decompUnits.c_str());
2605         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "",
2606                                 comm->systemInfo.cutoff);
2607
2608         if (bDynLoadBal)
2609         {
2610             limit = dd->comm->cellsize_limit;
2611         }
2612         else
2613         {
2614             if (dd->unitCellInfo.ddBoxIsDynamic)
2615             {
2616                 log->writeLine(
2617                         "(the following are initial values, they could change due to box "
2618                         "deformation)");
2619             }
2620             limit = dd->comm->cellsize_min[XX];
2621             for (d = 1; d < DIM; d++)
2622             {
2623                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2624             }
2625         }
2626
2627         if (comm->systemInfo.haveInterDomainBondeds)
2628         {
2629             log->writeLineFormatted("%40s  %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2630                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2631             log->writeLineFormatted("%40s  %-7s %6.3f nm", "multi-body bonded interactions",
2632                                     "(-rdd)",
2633                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2634                                             ? comm->cutoff_mbody
2635                                             : std::min(comm->systemInfo.cutoff, limit));
2636         }
2637         if (haveInterDomainVsites)
2638         {
2639             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2640         }
2641         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2642         {
2643             std::string separation =
2644                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2645             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2646         }
2647         log->ensureLineBreak();
2648     }
2649 }
2650
2651 static void logSettings(const gmx::MDLogger& mdlog,
2652                         gmx_domdec_t*        dd,
2653                         const gmx_mtop_t*    mtop,
2654                         const t_inputrec*    ir,
2655                         real                 dlb_scale,
2656                         const gmx_ddbox_t*   ddbox)
2657 {
2658     gmx::StringOutputStream stream;
2659     gmx::TextWriter         log(&stream);
2660     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2661     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2662     {
2663         {
2664             log.ensureEmptyLine();
2665             log.writeLine(
2666                     "When dynamic load balancing gets turned on, these settings will change to:");
2667         }
2668         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2669     }
2670     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2671 }
2672
2673 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2674                                 gmx_domdec_t*        dd,
2675                                 real                 dlb_scale,
2676                                 const t_inputrec*    ir,
2677                                 const gmx_ddbox_t*   ddbox)
2678 {
2679     gmx_domdec_comm_t* comm;
2680     int                d, dim, npulse, npulse_d_max, npulse_d;
2681     gmx_bool           bNoCutOff;
2682
2683     comm = dd->comm;
2684
2685     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2686
2687     /* Determine the maximum number of comm. pulses in one dimension */
2688
2689     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2690
2691     /* Determine the maximum required number of grid pulses */
2692     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2693     {
2694         /* Only a single pulse is required */
2695         npulse = 1;
2696     }
2697     else if (!bNoCutOff && comm->cellsize_limit > 0)
2698     {
2699         /* We round down slightly here to avoid overhead due to the latency
2700          * of extra communication calls when the cut-off
2701          * would be only slightly longer than the cell size.
2702          * Later cellsize_limit is redetermined,
2703          * so we can not miss interactions due to this rounding.
2704          */
2705         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2706     }
2707     else
2708     {
2709         /* There is no cell size limit */
2710         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2711     }
2712
2713     if (!bNoCutOff && npulse > 1)
2714     {
2715         /* See if we can do with less pulses, based on dlb_scale */
2716         npulse_d_max = 0;
2717         for (d = 0; d < dd->ndim; d++)
2718         {
2719             dim      = dd->dim[d];
2720             npulse_d = static_cast<int>(
2721                     1
2722                     + dd->numCells[dim] * comm->systemInfo.cutoff
2723                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2724             npulse_d_max = std::max(npulse_d_max, npulse_d);
2725         }
2726         npulse = std::min(npulse, npulse_d_max);
2727     }
2728
2729     /* This env var can override npulse */
2730     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2731     if (d > 0)
2732     {
2733         npulse = d;
2734     }
2735
2736     comm->maxpulse       = 1;
2737     comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2738     for (d = 0; d < dd->ndim; d++)
2739     {
2740         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2741         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2742         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2743         {
2744             comm->bVacDLBNoLimit = FALSE;
2745         }
2746     }
2747
2748     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2749     if (!comm->bVacDLBNoLimit)
2750     {
2751         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2752     }
2753     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2754     /* Set the minimum cell size for each DD dimension */
2755     for (d = 0; d < dd->ndim; d++)
2756     {
2757         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2758         {
2759             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2760         }
2761         else
2762         {
2763             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2764         }
2765     }
2766     if (comm->cutoff_mbody <= 0)
2767     {
2768         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2769     }
2770     if (isDlbOn(comm))
2771     {
2772         set_dlb_limits(dd);
2773     }
2774 }
2775
2776 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2777 {
2778     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2779 }
2780
2781 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2782 {
2783     /* If each molecule is a single charge group
2784      * or we use domain decomposition for each periodic dimension,
2785      * we do not need to take pbc into account for the bonded interactions.
2786      */
2787     return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2788             && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2789                  && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2790 }
2791
2792 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2793 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2794                                   gmx_domdec_t*        dd,
2795                                   real                 dlb_scale,
2796                                   const gmx_mtop_t*    mtop,
2797                                   const t_inputrec*    ir,
2798                                   const gmx_ddbox_t*   ddbox)
2799 {
2800     gmx_domdec_comm_t* comm        = dd->comm;
2801     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2802
2803     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2804     {
2805         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2806         if (ddRankSetup.npmedecompdim >= 2)
2807         {
2808             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2809         }
2810     }
2811     else
2812     {
2813         ddRankSetup.numRanksDoingPme = 0;
2814         if (dd->pme_nodeid >= 0)
2815         {
2816             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2817                                  "Can not have separate PME ranks without PME electrostatics");
2818         }
2819     }
2820
2821     if (debug)
2822     {
2823         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2824     }
2825     if (!isDlbDisabled(comm))
2826     {
2827         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2828     }
2829
2830     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2831
2832     real vol_frac;
2833     if (ir->pbcType == PbcType::No)
2834     {
2835         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2836     }
2837     else
2838     {
2839         vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2840                    / static_cast<double>(dd->nnodes);
2841     }
2842     if (debug)
2843     {
2844         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2845     }
2846     int natoms_tot = mtop->natoms;
2847
2848     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2849 }
2850
2851 /*! \brief Get some important DD parameters which can be modified by env.vars */
2852 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2853                                 const DomdecOptions&     options,
2854                                 const gmx::MdrunOptions& mdrunOptions,
2855                                 const t_inputrec&        ir)
2856 {
2857     DDSettings ddSettings;
2858
2859     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2860     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2861     ddSettings.request1D           = bool(dd_getenv(mdlog, "GMX_DD_1D", 0));
2862     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2863     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2864     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2865     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2866     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2867     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2868     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2869
2870     if (ddSettings.useSendRecv2)
2871     {
2872         GMX_LOG(mdlog.info)
2873                 .appendText(
2874                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2875                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2876                         "communication");
2877     }
2878
2879     if (ddSettings.eFlop)
2880     {
2881         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2882         ddSettings.recordLoad = true;
2883     }
2884     else
2885     {
2886         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2887     }
2888
2889     ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2890                                                           ddSettings.recordLoad, mdrunOptions, &ir);
2891     GMX_LOG(mdlog.info)
2892             .appendTextFormatted("Dynamic load balancing: %s",
2893                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2894
2895     return ddSettings;
2896 }
2897
2898 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2899
2900 /*! \brief Return whether the simulation described can run a 1D DD.
2901  *
2902  * The GPU halo exchange code requires 1D DD. Such a DD
2903  * generally requires a larger box than other possible decompositions
2904  * with the same rank count, so the calling code might need to decide
2905  * what is the most appropriate way to run the simulation based on
2906  * whether such a DD is possible.
2907  *
2908  * This function works like init_domain_decomposition(), but will not
2909  * give a fatal error, and only uses \c cr for communicating between
2910  * ranks.
2911  *
2912  * It is safe to call before thread-MPI spawns ranks, so that
2913  * thread-MPI can decide whether and how to trigger the GPU halo
2914  * exchange code path. The number of PME ranks, if any, should be set
2915  * in \c options.numPmeRanks.
2916  */
2917 static bool canMake1DDomainDecomposition(const DDSettings&              ddSettingsOriginal,
2918                                          const t_commrec*               cr,
2919                                          const int                      numRanksRequested,
2920                                          const DomdecOptions&           options,
2921                                          const gmx_mtop_t&              mtop,
2922                                          const t_inputrec&              ir,
2923                                          const matrix                   box,
2924                                          gmx::ArrayRef<const gmx::RVec> xGlobal)
2925 {
2926     // Ensure we don't write any output from this checking routine
2927     gmx::MDLogger dummyLogger;
2928
2929     DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2930
2931     DDSettings ddSettings = ddSettingsOriginal;
2932     ddSettings.request1D  = true;
2933     const real gridSetupCellsizeLimit =
2934             getDDGridSetupCellSizeLimit(dummyLogger, !isDlbDisabled(ddSettings.initialDlbState),
2935                                         options.dlbScaling, ir, systemInfo.cellsizeLimit);
2936     gmx_ddbox_t ddbox = { 0 };
2937     DDGridSetup ddGridSetup =
2938             getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2939                            gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2940
2941     const bool canMake1DDD = (ddGridSetup.numDomains[XX] != 0);
2942
2943     return canMake1DDD;
2944 }
2945
2946 bool is1D(const gmx_domdec_t& dd)
2947 {
2948     const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
2949     const int  productOfDimensionSizes      = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
2950     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2951
2952     return decompositionHasOneDimension;
2953 }
2954
2955 namespace gmx
2956 {
2957
2958 // TODO once the functionality stablizes, move this class and
2959 // supporting functionality into builder.cpp
2960 /*! \brief Impl class for DD builder */
2961 class DomainDecompositionBuilder::Impl
2962 {
2963 public:
2964     //! Constructor
2965     Impl(const MDLogger&      mdlog,
2966          t_commrec*           cr,
2967          const DomdecOptions& options,
2968          const MdrunOptions&  mdrunOptions,
2969          bool                 prefer1D,
2970          const gmx_mtop_t&    mtop,
2971          const t_inputrec&    ir,
2972          const matrix         box,
2973          ArrayRef<const RVec> xGlobal);
2974
2975     //! Build the resulting DD manager
2976     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2977
2978     //! Objects used in constructing and configuring DD
2979     //! {
2980     //! Logging object
2981     const MDLogger& mdlog_;
2982     //! Communication object
2983     t_commrec* cr_;
2984     //! User-supplied options configuring DD behavior
2985     const DomdecOptions options_;
2986     //! Global system topology
2987     const gmx_mtop_t& mtop_;
2988     //! User input values from the tpr file
2989     const t_inputrec& ir_;
2990     //! }
2991
2992     //! Internal objects used in constructing DD
2993     //! {
2994     //! Settings combined from the user input
2995     DDSettings ddSettings_;
2996     //! Information derived from the simulation system
2997     DDSystemInfo systemInfo_;
2998     //! Box structure
2999     gmx_ddbox_t ddbox_ = { 0 };
3000     //! Organization of the DD grids
3001     DDGridSetup ddGridSetup_;
3002     //! Organzation of the DD ranks
3003     DDRankSetup ddRankSetup_;
3004     //! Number of DD cells in each dimension
3005     ivec ddCellIndex_ = { 0, 0, 0 };
3006     //! IDs of PME-only ranks
3007     std::vector<int> pmeRanks_;
3008     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3009     CartesianRankSetup cartSetup_;
3010     //! }
3011 };
3012
3013 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3014                                        t_commrec*           cr,
3015                                        const DomdecOptions& options,
3016                                        const MdrunOptions&  mdrunOptions,
3017                                        const bool           prefer1D,
3018                                        const gmx_mtop_t&    mtop,
3019                                        const t_inputrec&    ir,
3020                                        const matrix         box,
3021                                        ArrayRef<const RVec> xGlobal) :
3022     mdlog_(mdlog),
3023     cr_(cr),
3024     options_(options),
3025     mtop_(mtop),
3026     ir_(ir)
3027 {
3028     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3029
3030     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3031
3032     if (prefer1D
3033         && canMake1DDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_, ir_, box, xGlobal))
3034     {
3035         ddSettings_.request1D = true;
3036     }
3037
3038     if (ddSettings_.eFlop > 1)
3039     {
3040         /* Ensure that we have different random flop counts on different ranks */
3041         srand(1 + cr_->nodeid);
3042     }
3043
3044     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3045
3046     const int  numRanksRequested         = cr_->nnodes;
3047     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
3048     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
3049                                    options_.numPmeRanks, checkForLargePrimeFactors);
3050
3051     // DD grid setup uses a more different cell size limit for
3052     // automated setup than the one in systemInfo_. The latter is used
3053     // in set_dd_limits() to configure DLB, for example.
3054     const real gridSetupCellsizeLimit =
3055             getDDGridSetupCellSizeLimit(mdlog_, !isDlbDisabled(ddSettings_.initialDlbState),
3056                                         options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3057     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3058                                   gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3059     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3060
3061     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3062
3063     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3064
3065     /* Generate the group communicator, also decides the duty of each rank */
3066     cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3067                                         ddCellIndex_, &pmeRanks_);
3068 }
3069
3070 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3071 {
3072     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3073
3074     copy_ivec(ddCellIndex_, dd->ci);
3075
3076     dd->comm = init_dd_comm();
3077
3078     dd->comm->ddRankSetup        = ddRankSetup_;
3079     dd->comm->cartesianRankSetup = cartSetup_;
3080
3081     set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3082                   ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3083
3084     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3085
3086     if (thisRankHasDuty(cr_, DUTY_PP))
3087     {
3088         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3089
3090         setup_neighbor_relations(dd);
3091     }
3092
3093     /* Set overallocation to avoid frequent reallocation of arrays */
3094     set_over_alloc_dd(TRUE);
3095
3096     dd->atomSets = atomSets;
3097
3098     return dd;
3099 }
3100
3101 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3102                                                        t_commrec*           cr,
3103                                                        const DomdecOptions& options,
3104                                                        const MdrunOptions&  mdrunOptions,
3105                                                        const bool           prefer1D,
3106                                                        const gmx_mtop_t&    mtop,
3107                                                        const t_inputrec&    ir,
3108                                                        const matrix         box,
3109                                                        ArrayRef<const RVec> xGlobal) :
3110     impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1D, mtop, ir, box, xGlobal))
3111 {
3112 }
3113
3114 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3115 {
3116     return impl_->build(atomSets);
3117 }
3118
3119 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3120
3121 } // namespace gmx
3122
3123 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3124 {
3125     gmx_domdec_t* dd;
3126     gmx_ddbox_t   ddbox;
3127     int           d, dim, np;
3128     real          inv_cell_size;
3129     int           LocallyLimited;
3130
3131     dd = cr->dd;
3132
3133     set_ddbox(*dd, false, box, true, x, &ddbox);
3134
3135     LocallyLimited = 0;
3136
3137     for (d = 0; d < dd->ndim; d++)
3138     {
3139         dim = dd->dim[d];
3140
3141         inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3142         if (dd->unitCellInfo.ddBoxIsDynamic)
3143         {
3144             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3145         }
3146
3147         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3148
3149         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3150         {
3151             if (np > dd->comm->cd[d].np_dlb)
3152             {
3153                 return FALSE;
3154             }
3155
3156             /* If a current local cell size is smaller than the requested
3157              * cut-off, we could still fix it, but this gets very complicated.
3158              * Without fixing here, we might actually need more checks.
3159              */
3160             real cellSizeAlongDim =
3161                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3162             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3163             {
3164                 LocallyLimited = 1;
3165             }
3166         }
3167     }
3168
3169     if (!isDlbDisabled(dd->comm))
3170     {
3171         /* If DLB is not active yet, we don't need to check the grid jumps.
3172          * Actually we shouldn't, because then the grid jump data is not set.
3173          */
3174         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3175         {
3176             LocallyLimited = 1;
3177         }
3178
3179         gmx_sumi(1, &LocallyLimited, cr);
3180
3181         if (LocallyLimited > 0)
3182         {
3183             return FALSE;
3184         }
3185     }
3186
3187     return TRUE;
3188 }
3189
3190 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3191 {
3192     gmx_bool bCutoffAllowed;
3193
3194     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3195
3196     if (bCutoffAllowed)
3197     {
3198         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3199     }
3200
3201     return bCutoffAllowed;
3202 }
3203
3204 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3205                               const t_commrec&                cr,
3206                               const gmx::DeviceStreamManager& deviceStreamManager)
3207 {
3208     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3209                        "Local non-bonded stream should be valid when using"
3210                        "GPU halo exchange.");
3211     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3212                        "Non-local non-bonded stream should be valid when using "
3213                        "GPU halo exchange.");
3214     int gpuHaloExchangeSize = 0;
3215     int pulseStart          = 0;
3216     if (cr.dd->gpuHaloExchange.empty())
3217     {
3218         GMX_LOG(mdlog.warning)
3219                 .asParagraph()
3220                 .appendTextFormatted(
3221                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3222                         "by the "
3223                         "GMX_GPU_DD_COMMS environment variable.");
3224     }
3225     else
3226     {
3227         gpuHaloExchangeSize = static_cast<int>(cr.dd->gpuHaloExchange.size());
3228         pulseStart          = gpuHaloExchangeSize - 1;
3229     }
3230     if (cr.dd->comm->cd[0].numPulses() > gpuHaloExchangeSize)
3231     {
3232         for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3233         {
3234             cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
3235                     cr.dd, cr.mpi_comm_mysim, deviceStreamManager.context(),
3236                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3237                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal), pulse));
3238         }
3239     }
3240 }
3241
3242 void reinitGpuHaloExchange(const t_commrec&              cr,
3243                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3244                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3245 {
3246     for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3247     {
3248         cr.dd->gpuHaloExchange[pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3249     }
3250 }
3251
3252 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3253                                    const matrix          box,
3254                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3255 {
3256     for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3257     {
3258         cr.dd->gpuHaloExchange[pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3259     }
3260 }
3261
3262 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3263 {
3264     for (int pulse = cr.dd->comm->cd[0].numPulses() - 1; pulse >= 0; pulse--)
3265     {
3266         cr.dd->gpuHaloExchange[pulse]->communicateHaloForces(accumulateForces);
3267     }
3268 }