src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/localtopologychecker.h"
  64 #include "gromacs/domdec/options.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/domdec/reversetopology.h"
  67 #include "gromacs/gmxlib/network.h"
  68 #include "gromacs/gmxlib/nrnb.h"
  69 #include "gromacs/gpu_utils/device_stream_manager.h"
  70 #include "gromacs/gpu_utils/gpu_utils.h"
  71 #include "gromacs/hardware/hw_info.h"
  72 #include "gromacs/math/vec.h"
  73 #include "gromacs/math/vectypes.h"
  74 #include "gromacs/mdlib/calc_verletbuf.h"
  75 #include "gromacs/mdlib/constr.h"
  76 #include "gromacs/mdlib/constraintrange.h"
  77 #include "gromacs/mdlib/updategroups.h"
  78 #include "gromacs/mdlib/vcm.h"
  79 #include "gromacs/mdlib/vsite.h"
  80 #include "gromacs/mdtypes/commrec.h"
  81 #include "gromacs/mdtypes/forceoutput.h"
  82 #include "gromacs/mdtypes/inputrec.h"
  83 #include "gromacs/mdtypes/mdrunoptions.h"
  84 #include "gromacs/mdtypes/state.h"
  85 #include "gromacs/pbcutil/ishift.h"
  86 #include "gromacs/pbcutil/pbc.h"
  87 #include "gromacs/pulling/pull.h"
  88 #include "gromacs/timing/wallcycle.h"
  89 #include "gromacs/topology/block.h"
  90 #include "gromacs/topology/idef.h"
  91 #include "gromacs/topology/ifunc.h"
  92 #include "gromacs/topology/mtop_lookup.h"
  93 #include "gromacs/topology/mtop_util.h"
  94 #include "gromacs/topology/topology.h"
  95 #include "gromacs/utility/basedefinitions.h"
  96 #include "gromacs/utility/basenetwork.h"
  97 #include "gromacs/utility/cstringutil.h"
  98 #include "gromacs/utility/enumerationhelpers.h"
  99 #include "gromacs/utility/exceptions.h"
 100 #include "gromacs/utility/fatalerror.h"
 101 #include "gromacs/utility/gmxmpi.h"
 102 #include "gromacs/utility/logger.h"
 103 #include "gromacs/utility/real.h"
 104 #include "gromacs/utility/smalloc.h"
 105 #include "gromacs/utility/strconvert.h"
 106 #include "gromacs/utility/stringstream.h"
 107 #include "gromacs/utility/stringutil.h"
 108 #include "gromacs/utility/textwriter.h"
 109
 110 #include "atomdistribution.h"
 111 #include "box.h"
 112 #include "cellsizes.h"
 113 #include "distribute.h"
 114 #include "domdec_constraints.h"
 115 #include "domdec_internal.h"
 116 #include "domdec_setup.h"
 117 #include "domdec_vsite.h"
 118 #include "redistribute.h"
 119 #include "utility.h"
 120
 121 // TODO remove this when moving domdec into gmx namespace
 122 using gmx::ArrayRef;
 123 using gmx::DdRankOrder;
 124 using gmx::DlbOption;
 125 using gmx::DomdecOptions;
 126 using gmx::RangePartitioning;
 127
 128 static const char* enumValueToString(DlbState enumValue)
 129 {
 130     static constexpr gmx::EnumerationArray<DlbState, const char*> dlbStateNames = {
 131         "off", "off", "auto", "locked", "on", "on"
 132     };
 133     return dlbStateNames[enumValue];
 134 }
 135
 136 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 137 #define DD_CGIBS 2
 138
 139 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 140 #define DD_FLAG_NRCG 65535
 141 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 142 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 143
 144 /* The DD zone order */
 145 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 146                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 147
 148 /* The non-bonded zone-pair setup for domain decomposition
 149  * The first number is the i-zone, the second number the first j-zone seen by
 150  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 151  * As is, this is for 3D decomposition, where there are 4 i-zones.
 152  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 153  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 154  */
 155 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 156                                                                { 1, 3, 6 },
 157                                                                { 2, 5, 6 },
 158                                                                { 3, 5, 7 } };
 159
 160 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 161 {
 162     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 163     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 164     xyz[ZZ] = ind % nc[ZZ];
 165 }
 166
 167 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 168 {
 169     int ddnodeid = -1;
 170
 171     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 172     const int                 ddindex   = dd_index(dd->numCells, c);
 173     if (cartSetup.bCartesianPP_PME)
 174     {
 175         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 176     }
 177     else if (cartSetup.bCartesianPP)
 178     {
 179 #if GMX_MPI
 180         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 181 #endif
 182     }
 183     else
 184     {
 185         ddnodeid = ddindex;
 186     }
 187
 188     return ddnodeid;
 189 }
 190
 191 int ddglatnr(const gmx_domdec_t* dd, int i)
 192 {
 193     int atnr = 0;
 194
 195     if (dd == nullptr)
 196     {
 197         atnr = i + 1;
 198     }
 199     else
 200     {
 201         if (i >= dd->comm->atomRanges.numAtomsTotal())
 202         {
 203             gmx_fatal(FARGS,
 204                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 205                       i,
 206                       dd->comm->atomRanges.numAtomsTotal());
 207         }
 208         atnr = dd->globalAtomIndices[i] + 1;
 209     }
 210
 211     return atnr;
 212 }
 213
 214 void dd_store_state(const gmx_domdec_t& dd, t_state* state)
 215 {
 216     if (state->ddp_count != dd.ddp_count)
 217     {
 218         gmx_incons("The MD state does not match the domain decomposition state");
 219     }
 220
 221     state->cg_gl.resize(dd.ncg_home);
 222     for (int i = 0; i < dd.ncg_home; i++)
 223     {
 224         state->cg_gl[i] = dd.globalAtomGroupIndices[i];
 225     }
 226
 227     state->ddp_count_cg_gl = dd.ddp_count;
 228 }
 229
 230 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 231 {
 232     return &dd->comm->zones;
 233 }
 234
 235 int dd_numAtomsZones(const gmx_domdec_t& dd)
 236 {
 237     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 238 }
 239
 240 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 241 {
 242     return dd.comm->atomRanges.numHomeAtoms();
 243 }
 244
 245 int dd_natoms_mdatoms(const gmx_domdec_t& dd)
 246 {
 247     /* We currently set mdatoms entries for all atoms:
 248      * local + non-local + communicated for vsite + constraints
 249      */
 250
 251     return dd.comm->atomRanges.numAtomsTotal();
 252 }
 253
 254 int dd_natoms_vsite(const gmx_domdec_t& dd)
 255 {
 256     return dd.comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 257 }
 258
 259 void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
 260 {
 261     *at_start = dd.comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 262     *at_end   = dd.comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 263 }
 264
 265 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 266 {
 267     wallcycle_start(wcycle, WallCycleCounter::MoveX);
 268
 269     rvec shift = { 0, 0, 0 };
 270
 271     gmx_domdec_comm_t* comm = dd->comm;
 272
 273     int nzone   = 1;
 274     int nat_tot = comm->atomRanges.numHomeAtoms();
 275     for (int d = 0; d < dd->ndim; d++)
 276     {
 277         const bool bPBC   = (dd->ci[dd->dim[d]] == 0);
 278         const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 279         if (bPBC)
 280         {
 281             copy_rvec(box[dd->dim[d]], shift);
 282         }
 283         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 284         for (const gmx_domdec_ind_t& ind : cd->ind)
 285         {
 286             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 287             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 288             int                       n          = 0;
 289             if (!bPBC)
 290             {
 291                 for (int j : ind.index)
 292                 {
 293                     sendBuffer[n] = x[j];
 294                     n++;
 295                 }
 296             }
 297             else if (!bScrew)
 298             {
 299                 for (int j : ind.index)
 300                 {
 301                     /* We need to shift the coordinates */
 302                     for (int d = 0; d < DIM; d++)
 303                     {
 304                         sendBuffer[n][d] = x[j][d] + shift[d];
 305                     }
 306                     n++;
 307                 }
 308             }
 309             else
 310             {
 311                 for (int j : ind.index)
 312                 {
 313                     /* Shift x */
 314                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 315                     /* Rotate y and z.
 316                      * This operation requires a special shift force
 317                      * treatment, which is performed in calc_vir.
 318                      */
 319                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 320                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 321                     n++;
 322                 }
 323             }
 324
 325             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 326                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 327
 328             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 329             if (cd->receiveInPlace)
 330             {
 331                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 332             }
 333             else
 334             {
 335                 receiveBuffer = receiveBufferAccess.buffer;
 336             }
 337             /* Send and receive the coordinates */
 338             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 339
 340             if (!cd->receiveInPlace)
 341             {
 342                 int j = 0;
 343                 for (int zone = 0; zone < nzone; zone++)
 344                 {
 345                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 346                     {
 347                         x[i] = receiveBuffer[j++];
 348                     }
 349                 }
 350             }
 351             nat_tot += ind.nrecv[nzone + 1];
 352         }
 353         nzone += nzone;
 354     }
 355
 356     wallcycle_stop(wcycle, WallCycleCounter::MoveX);
 357 }
 358
 359 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 360 {
 361     wallcycle_start(wcycle, WallCycleCounter::MoveF);
 362
 363     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 364     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 365
 366     gmx_domdec_comm_t& comm    = *dd->comm;
 367     int                nzone   = comm.zones.n / 2;
 368     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 369     for (int d = dd->ndim - 1; d >= 0; d--)
 370     {
 371         /* Only forces in domains near the PBC boundaries need to
 372            consider PBC in the treatment of fshift */
 373         const bool shiftForcesNeedPbc =
 374                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 375         const bool applyScrewPbc =
 376                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 377         /* Determine which shift vector we need */
 378         ivec vis        = { 0, 0, 0 };
 379         vis[dd->dim[d]] = 1;
 380         const int is    = gmx::ivecToShiftIndex(vis);
 381
 382         /* Loop over the pulses */
 383         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 384         for (int p = cd.numPulses() - 1; p >= 0; p--)
 385         {
 386             const gmx_domdec_ind_t&   ind = cd.ind[p];
 387             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 388             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 389
 390             nat_tot -= ind.nrecv[nzone + 1];
 391
 392             DDBufferAccess<gmx::RVec> sendBufferAccess(
 393                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 394
 395             gmx::ArrayRef<gmx::RVec> sendBuffer;
 396             if (cd.receiveInPlace)
 397             {
 398                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 399             }
 400             else
 401             {
 402                 sendBuffer = sendBufferAccess.buffer;
 403                 int j      = 0;
 404                 for (int zone = 0; zone < nzone; zone++)
 405                 {
 406                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 407                     {
 408                         sendBuffer[j++] = f[i];
 409                     }
 410                 }
 411             }
 412             /* Communicate the forces */
 413             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 414             /* Add the received forces */
 415             int n = 0;
 416             if (!shiftForcesNeedPbc)
 417             {
 418                 for (int j : ind.index)
 419                 {
 420                     for (int d = 0; d < DIM; d++)
 421                     {
 422                         f[j][d] += receiveBuffer[n][d];
 423                     }
 424                     n++;
 425                 }
 426             }
 427             else if (!applyScrewPbc)
 428             {
 429                 for (int j : ind.index)
 430                 {
 431                     for (int d = 0; d < DIM; d++)
 432                     {
 433                         f[j][d] += receiveBuffer[n][d];
 434                     }
 435                     /* Add this force to the shift force */
 436                     for (int d = 0; d < DIM; d++)
 437                     {
 438                         fshift[is][d] += receiveBuffer[n][d];
 439                     }
 440                     n++;
 441                 }
 442             }
 443             else
 444             {
 445                 for (int j : ind.index)
 446                 {
 447                     /* Rotate the force */
 448                     f[j][XX] += receiveBuffer[n][XX];
 449                     f[j][YY] -= receiveBuffer[n][YY];
 450                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 451                     if (shiftForcesNeedPbc)
 452                     {
 453                         /* Add this force to the shift force */
 454                         for (int d = 0; d < DIM; d++)
 455                         {
 456                             fshift[is][d] += receiveBuffer[n][d];
 457                         }
 458                     }
 459                     n++;
 460                 }
 461             }
 462         }
 463         nzone /= 2;
 464     }
 465     wallcycle_stop(wcycle, WallCycleCounter::MoveF);
 466 }
 467
 468 /* Convenience function for extracting a real buffer from an rvec buffer
 469  *
 470  * To reduce the number of temporary communication buffers and avoid
 471  * cache polution, we reuse gmx::RVec buffers for storing reals.
 472  * This functions return a real buffer reference with the same number
 473  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 474  */
 475 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 476 {
 477     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 478 }
 479
 480 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 481 {
 482     gmx_domdec_comm_t* comm = dd->comm;
 483
 484     int nzone   = 1;
 485     int nat_tot = comm->atomRanges.numHomeAtoms();
 486     for (int d = 0; d < dd->ndim; d++)
 487     {
 488         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 489         for (const gmx_domdec_ind_t& ind : cd->ind)
 490         {
 491             /* Note: We provision for RVec instead of real, so a factor of 3
 492              * more than needed. The buffer actually already has this size
 493              * and we pass a plain pointer below, so this does not matter.
 494              */
 495             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 496             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 497             int                 n          = 0;
 498             for (int j : ind.index)
 499             {
 500                 sendBuffer[n++] = v[j];
 501             }
 502
 503             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 504                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 505
 506             gmx::ArrayRef<real> receiveBuffer;
 507             if (cd->receiveInPlace)
 508             {
 509                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 510             }
 511             else
 512             {
 513                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 514             }
 515             /* Send and receive the data */
 516             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 517             if (!cd->receiveInPlace)
 518             {
 519                 int j = 0;
 520                 for (int zone = 0; zone < nzone; zone++)
 521                 {
 522                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 523                     {
 524                         v[i] = receiveBuffer[j++];
 525                     }
 526                 }
 527             }
 528             nat_tot += ind.nrecv[nzone + 1];
 529         }
 530         nzone += nzone;
 531     }
 532 }
 533
 534 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 535 {
 536     gmx_domdec_comm_t* comm = dd->comm;
 537
 538     int nzone   = comm->zones.n / 2;
 539     int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 540     for (int d = dd->ndim - 1; d >= 0; d--)
 541     {
 542         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 543         for (int p = cd->numPulses() - 1; p >= 0; p--)
 544         {
 545             const gmx_domdec_ind_t& ind = cd->ind[p];
 546
 547             /* Note: We provision for RVec instead of real, so a factor of 3
 548              * more than needed. The buffer actually already has this size
 549              * and we typecast, so this works as intended.
 550              */
 551             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 552             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 553             nat_tot -= ind.nrecv[nzone + 1];
 554
 555             DDBufferAccess<gmx::RVec> sendBufferAccess(
 556                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 557
 558             gmx::ArrayRef<real> sendBuffer;
 559             if (cd->receiveInPlace)
 560             {
 561                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 562             }
 563             else
 564             {
 565                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 566                 int j      = 0;
 567                 for (int zone = 0; zone < nzone; zone++)
 568                 {
 569                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 570                     {
 571                         sendBuffer[j++] = v[i];
 572                     }
 573                 }
 574             }
 575             /* Communicate the forces */
 576             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 577             /* Add the received forces */
 578             int n = 0;
 579             for (int j : ind.index)
 580             {
 581                 v[j] += receiveBuffer[n];
 582                 n++;
 583             }
 584         }
 585         nzone /= 2;
 586     }
 587 }
 588
 589 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 590 {
 591     const gmx_domdec_comm_t& comm       = *dd->comm;
 592     const DDSystemInfo&      systemInfo = comm.systemInfo;
 593
 594     real r = -1;
 595     if (systemInfo.haveInterDomainMultiBodyBondeds)
 596     {
 597         if (comm.cutoff_mbody > 0)
 598         {
 599             r = comm.cutoff_mbody;
 600         }
 601         else
 602         {
 603             /* cutoff_mbody=0 means we do not have DLB */
 604             r = comm.cellsize_min[dd->dim[0]];
 605             for (int di = 1; di < dd->ndim; di++)
 606             {
 607                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 608             }
 609             if (comm.systemInfo.filterBondedCommunication)
 610             {
 611                 r = std::max(r, comm.cutoff_mbody);
 612             }
 613             else
 614             {
 615                 r = std::min(r, systemInfo.cutoff);
 616             }
 617         }
 618     }
 619
 620     return r;
 621 }
 622
 623 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 624 {
 625     const real r_mb = dd_cutoff_multibody(dd);
 626
 627     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 628 }
 629
 630
 631 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 632                                    const CartesianRankSetup& cartSetup,
 633                                    const ivec                coord,
 634                                    ivec                      coord_pme)
 635 {
 636     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 637     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 638     copy_ivec(coord, coord_pme);
 639     coord_pme[cartSetup.cartpmedim] =
 640             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 641 }
 642
 643 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 644 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 645 {
 646     const int npp  = ddRankSetup.numPPRanks;
 647     const int npme = ddRankSetup.numRanksDoingPme;
 648
 649     /* Here we assign a PME node to communicate with this DD node
 650      * by assuming that the major index of both is x.
 651      * We add npme/2 to obtain an even distribution.
 652      */
 653     return (ddCellIndex * npme + npme / 2) / npp;
 654 }
 655
 656 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 657 {
 658     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 659
 660     int n = 0;
 661     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 662     {
 663         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 664         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 665         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 666         {
 667             if (debug)
 668             {
 669                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 670             }
 671             pmeRanks[n] = i + 1 + n;
 672             n++;
 673         }
 674     }
 675
 676     return pmeRanks;
 677 }
 678
 679 static int gmx_ddcoord2pmeindex(const gmx_domdec_t& dd, int x, int y, int z)
 680 {
 681     ivec coords;
 682
 683     coords[XX]     = x;
 684     coords[YY]     = y;
 685     coords[ZZ]     = z;
 686     const int slab = ddindex2pmeindex(dd.comm->ddRankSetup, dd_index(dd.numCells, coords));
 687
 688     return slab;
 689 }
 690
 691 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 692 {
 693     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 694     ivec                      coords    = { x, y, z };
 695     int                       nodeid    = -1;
 696
 697     if (cartSetup.bCartesianPP_PME)
 698     {
 699 #if GMX_MPI
 700         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 701 #endif
 702     }
 703     else
 704     {
 705         int ddindex = dd_index(cr->dd->numCells, coords);
 706         if (cartSetup.bCartesianPP)
 707         {
 708             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 709         }
 710         else
 711         {
 712             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 713             {
 714                 nodeid = ddindex + gmx_ddcoord2pmeindex(*cr->dd, x, y, z);
 715             }
 716             else
 717             {
 718                 nodeid = ddindex;
 719             }
 720         }
 721     }
 722
 723     return nodeid;
 724 }
 725
 726 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 727                               const CartesianRankSetup& cartSetup,
 728                               gmx::ArrayRef<const int>  pmeRanks,
 729                               const t_commrec gmx_unused* cr,
 730                               const int                   sim_nodeid)
 731 {
 732     int pmenode = -1;
 733
 734     /* This assumes a uniform x domain decomposition grid cell size */
 735     if (cartSetup.bCartesianPP_PME)
 736     {
 737 #if GMX_MPI
 738         ivec coord, coord_pme;
 739         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 740         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 741         {
 742             /* This is a PP rank */
 743             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 744             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 745         }
 746 #endif
 747     }
 748     else if (cartSetup.bCartesianPP)
 749     {
 750         if (sim_nodeid < ddRankSetup.numPPRanks)
 751         {
 752             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 753         }
 754     }
 755     else
 756     {
 757         /* This assumes DD cells with identical x coordinates
 758          * are numbered sequentially.
 759          */
 760         if (pmeRanks.empty())
 761         {
 762             if (sim_nodeid < ddRankSetup.numPPRanks)
 763             {
 764                 /* The DD index equals the nodeid */
 765                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 766             }
 767         }
 768         else
 769         {
 770             int i = 0;
 771             while (sim_nodeid > pmeRanks[i])
 772             {
 773                 i++;
 774             }
 775             if (sim_nodeid < pmeRanks[i])
 776             {
 777                 pmenode = pmeRanks[i];
 778             }
 779         }
 780     }
 781
 782     return pmenode;
 783 }
 784
 785 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 786 {
 787     if (dd != nullptr)
 788     {
 789         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 790     }
 791     else
 792     {
 793         return { 1, 1 };
 794     }
 795 }
 796
 797 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 798 {
 799     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 800     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 801     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 802                        "This function should only be called when PME-only ranks are in use");
 803     std::vector<int> ddranks;
 804     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 805
 806     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 807     {
 808         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 809         {
 810             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 811             {
 812                 if (cartSetup.bCartesianPP_PME)
 813                 {
 814                     ivec coord = { x, y, z };
 815                     ivec coord_pme;
 816                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 817                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 818                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 819                     {
 820                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 821                     }
 822                 }
 823                 else
 824                 {
 825                     /* The slab corresponds to the nodeid in the PME group */
 826                     if (gmx_ddcoord2pmeindex(*cr->dd, x, y, z) == pmenodeid)
 827                     {
 828                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 829                     }
 830                 }
 831             }
 832         }
 833     }
 834     return ddranks;
 835 }
 836
 837 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 838 {
 839     bool bReceive = true;
 840
 841     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 842     if (ddRankSetup.usePmeOnlyRanks)
 843     {
 844         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 845         if (cartSetup.bCartesianPP_PME)
 846         {
 847 #if GMX_MPI
 848             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 849             ivec coords;
 850             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 851             coords[cartSetup.cartpmedim]++;
 852             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 853             {
 854                 int rank = 0;
 855                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 856                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 857                 {
 858                     /* This is not the last PP node for pmenode */
 859                     bReceive = FALSE;
 860                 }
 861             }
 862 #else
 863             GMX_RELEASE_ASSERT(
 864                     false,
 865                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 866 #endif
 867         }
 868         else
 869         {
 870             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 871             if (cr->sim_nodeid + 1 < cr->nnodes
 872                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 873             {
 874                 /* This is not the last PP node for pmenode */
 875                 bReceive = FALSE;
 876             }
 877         }
 878     }
 879
 880     return bReceive;
 881 }
 882
 883 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 884 {
 885     gmx_domdec_comm_t* comm = dd->comm;
 886
 887     snew(*dim_f, dd->numCells[dim] + 1);
 888     (*dim_f)[0] = 0;
 889     for (int i = 1; i < dd->numCells[dim]; i++)
 890     {
 891         if (comm->slb_frac[dim])
 892         {
 893             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 894         }
 895         else
 896         {
 897             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 898         }
 899     }
 900     (*dim_f)[dd->numCells[dim]] = 1;
 901 }
 902
 903 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 904 {
 905     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 906
 907     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 908     {
 909         ddpme->dim = YY;
 910     }
 911     else
 912     {
 913         ddpme->dim = dimind;
 914     }
 915     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 916
 917     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 918
 919     if (ddpme->nslab <= 1)
 920     {
 921         return;
 922     }
 923
 924     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 925     /* Determine for each PME slab the PP location range for dimension dim */
 926     snew(ddpme->pp_min, ddpme->nslab);
 927     snew(ddpme->pp_max, ddpme->nslab);
 928     for (int slab = 0; slab < ddpme->nslab; slab++)
 929     {
 930         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 931         ddpme->pp_max[slab] = 0;
 932     }
 933     for (int i = 0; i < dd->nnodes; i++)
 934     {
 935         ivec xyz;
 936         ddindex2xyz(dd->numCells, i, xyz);
 937         /* For y only use our y/z slab.
 938          * This assumes that the PME x grid size matches the DD grid size.
 939          */
 940         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 941         {
 942             const int pmeindex  = ddindex2pmeindex(ddRankSetup, i);
 943             const int slab      = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
 944             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 945             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 946         }
 947     }
 948
 949     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 950 }
 951
 952 int dd_pme_maxshift_x(const gmx_domdec_t& dd)
 953 {
 954     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
 955
 956     if (ddRankSetup.ddpme[0].dim == XX)
 957     {
 958         return ddRankSetup.ddpme[0].maxshift;
 959     }
 960     else
 961     {
 962         return 0;
 963     }
 964 }
 965
 966 int dd_pme_maxshift_y(const gmx_domdec_t& dd)
 967 {
 968     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
 969
 970     if (ddRankSetup.ddpme[0].dim == YY)
 971     {
 972         return ddRankSetup.ddpme[0].maxshift;
 973     }
 974     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 975     {
 976         return ddRankSetup.ddpme[1].maxshift;
 977     }
 978     else
 979     {
 980         return 0;
 981     }
 982 }
 983
 984 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
 985 {
 986     return dd.comm->systemInfo.useUpdateGroups;
 987 }
 988
 989 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
 990 {
 991     /* Note that the cycles value can be incorrect, either 0 or some
 992      * extremely large value, when our thread migrated to another core
 993      * with an unsynchronized cycle counter. If this happens less often
 994      * that once per nstlist steps, this will not cause issues, since
 995      * we later subtract the maximum value from the sum over nstlist steps.
 996      * A zero count will slightly lower the total, but that's a small effect.
 997      * Note that the main purpose of the subtraction of the maximum value
 998      * is to avoid throwing off the load balancing when stalls occur due
 999      * e.g. system activity or network congestion.
1000      */
1001     dd->comm->cycl[ddCycl] += cycles;
1002     dd->comm->cycl_n[ddCycl]++;
1003     if (cycles > dd->comm->cycl_max[ddCycl])
1004     {
1005         dd->comm->cycl_max[ddCycl] = cycles;
1006     }
1007 }
1008
1009 #if GMX_MPI
1010 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1011 {
1012     MPI_Comm c_row = MPI_COMM_NULL;
1013     ivec     loc_c;
1014     bool     bPartOfGroup = false;
1015
1016     const int dim = dd->dim[dim_ind];
1017     copy_ivec(loc, loc_c);
1018     for (int i = 0; i < dd->numCells[dim]; i++)
1019     {
1020         loc_c[dim]     = i;
1021         const int rank = dd_index(dd->numCells, loc_c);
1022         if (rank == dd->rank)
1023         {
1024             /* This process is part of the group */
1025             bPartOfGroup = TRUE;
1026         }
1027     }
1028     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1029     if (bPartOfGroup)
1030     {
1031         dd->comm->mpi_comm_load[dim_ind] = c_row;
1032         if (!isDlbDisabled(dd->comm))
1033         {
1034             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1035
1036             if (dd->ci[dim] == dd->master_ci[dim])
1037             {
1038                 /* This is the root process of this row */
1039                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1040
1041                 RowMaster& rowMaster = *cellsizes.rowMaster;
1042                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1043                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1044                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1045                 if (dim_ind > 0)
1046                 {
1047                     rowMaster.bounds.resize(dd->numCells[dim]);
1048                 }
1049                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1050             }
1051             else
1052             {
1053                 /* This is not a root process, we only need to receive cell_f */
1054                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1055             }
1056         }
1057         if (dd->ci[dim] == dd->master_ci[dim])
1058         {
1059             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1060         }
1061     }
1062 }
1063 #endif
1064
1065 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1066 {
1067 #if GMX_MPI
1068     MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
1069
1070     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1071     {
1072         /* Only ranks with short-ranged tasks (currently) use GPUs.
1073          * If we don't have GPUs assigned, there are no resources to share.
1074          */
1075         return;
1076     }
1077
1078     const int physicalnode_id_hash = gmx_physicalnode_id_hash();
1079
1080     gmx_domdec_t* dd = cr->dd;
1081
1082     if (debug)
1083     {
1084         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1085         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1086     }
1087     /* Split the PP communicator over the physical nodes */
1088     /* TODO: See if we should store this (before), as it's also used for
1089      * for the nodecomm summation.
1090      */
1091     // TODO PhysicalNodeCommunicator could be extended/used to handle
1092     // the need for per-node per-group communicators.
1093     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1094     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1095     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1096     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1097
1098     if (debug)
1099     {
1100         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1101     }
1102
1103     /* Note that some ranks could share a GPU, while others don't */
1104
1105     if (dd->comm->nrank_gpu_shared == 1)
1106     {
1107         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1108     }
1109 #else
1110     GMX_UNUSED_VALUE(cr);
1111     GMX_UNUSED_VALUE(gpu_id);
1112 #endif
1113 }
1114
1115 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1116 {
1117 #if GMX_MPI
1118     ivec loc;
1119
1120     if (debug)
1121     {
1122         fprintf(debug, "Making load communicators\n");
1123     }
1124
1125     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1126     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1127
1128     if (dd->ndim == 0)
1129     {
1130         return;
1131     }
1132
1133     clear_ivec(loc);
1134     make_load_communicator(dd, 0, loc);
1135     if (dd->ndim > 1)
1136     {
1137         const int dim0 = dd->dim[0];
1138         for (int i = 0; i < dd->numCells[dim0]; i++)
1139         {
1140             loc[dim0] = i;
1141             make_load_communicator(dd, 1, loc);
1142         }
1143     }
1144     if (dd->ndim > 2)
1145     {
1146         const int dim0 = dd->dim[0];
1147         for (int i = 0; i < dd->numCells[dim0]; i++)
1148         {
1149             loc[dim0]      = i;
1150             const int dim1 = dd->dim[1];
1151             for (int j = 0; j < dd->numCells[dim1]; j++)
1152             {
1153                 loc[dim1] = j;
1154                 make_load_communicator(dd, 2, loc);
1155             }
1156         }
1157     }
1158
1159     if (debug)
1160     {
1161         fprintf(debug, "Finished making load communicators\n");
1162     }
1163 #endif
1164 }
1165
1166 /*! \brief Sets up the relation between neighboring domains and zones */
1167 static void setup_neighbor_relations(gmx_domdec_t* dd)
1168 {
1169     ivec tmp, s;
1170     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1171
1172     for (int d = 0; d < dd->ndim; d++)
1173     {
1174         const int dim = dd->dim[d];
1175         copy_ivec(dd->ci, tmp);
1176         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1177         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1178         copy_ivec(dd->ci, tmp);
1179         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1180         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1181         if (debug)
1182         {
1183             fprintf(debug,
1184                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1185                     dd->rank,
1186                     dim,
1187                     dd->neighbor[d][0],
1188                     dd->neighbor[d][1]);
1189         }
1190     }
1191
1192     int nzone  = (1 << dd->ndim);
1193     int nizone = (1 << std::max(dd->ndim - 1, 0));
1194     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1195
1196     gmx_domdec_zones_t* zones = &dd->comm->zones;
1197
1198     for (int i = 0; i < nzone; i++)
1199     {
1200         int m = 0;
1201         clear_ivec(zones->shift[i]);
1202         for (int d = 0; d < dd->ndim; d++)
1203         {
1204             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1205         }
1206     }
1207
1208     zones->n = nzone;
1209     for (int i = 0; i < nzone; i++)
1210     {
1211         for (int d = 0; d < DIM; d++)
1212         {
1213             s[d] = dd->ci[d] - zones->shift[i][d];
1214             if (s[d] < 0)
1215             {
1216                 s[d] += dd->numCells[d];
1217             }
1218             else if (s[d] >= dd->numCells[d])
1219             {
1220                 s[d] -= dd->numCells[d];
1221             }
1222         }
1223     }
1224     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1225     {
1226         GMX_RELEASE_ASSERT(
1227                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1228                 "The first element for each ddNonbondedZonePairRanges should match its index");
1229
1230         DDPairInteractionRanges iZone;
1231         iZone.iZoneIndex = iZoneIndex;
1232         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1233          * j-zones up to nzone.
1234          */
1235         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1236                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1237         for (int dim = 0; dim < DIM; dim++)
1238         {
1239             if (dd->numCells[dim] == 1)
1240             {
1241                 /* All shifts should be allowed */
1242                 iZone.shift0[dim] = -1;
1243                 iZone.shift1[dim] = 1;
1244             }
1245             else
1246             {
1247                 /* Determine the min/max j-zone shift wrt the i-zone */
1248                 iZone.shift0[dim] = 1;
1249                 iZone.shift1[dim] = -1;
1250                 for (int jZone : iZone.jZoneRange)
1251                 {
1252                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1253                     if (shift_diff < iZone.shift0[dim])
1254                     {
1255                         iZone.shift0[dim] = shift_diff;
1256                     }
1257                     if (shift_diff > iZone.shift1[dim])
1258                     {
1259                         iZone.shift1[dim] = shift_diff;
1260                     }
1261                 }
1262             }
1263         }
1264
1265         zones->iZones.push_back(iZone);
1266     }
1267
1268     if (!isDlbDisabled(dd->comm))
1269     {
1270         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1271     }
1272
1273     if (dd->comm->ddSettings.recordLoad)
1274     {
1275         make_load_communicators(dd);
1276     }
1277 }
1278
1279 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1280                                  gmx_domdec_t*        dd,
1281                                  t_commrec gmx_unused* cr,
1282                                  bool gmx_unused       reorder)
1283 {
1284 #if GMX_MPI
1285     gmx_domdec_comm_t*  comm      = dd->comm;
1286     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1287
1288     if (cartSetup.bCartesianPP)
1289     {
1290         /* Set up cartesian communication for the particle-particle part */
1291         GMX_LOG(mdlog.info)
1292                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1293                                      dd->numCells[XX],
1294                                      dd->numCells[YY],
1295                                      dd->numCells[ZZ]);
1296
1297         ivec periods;
1298         for (int i = 0; i < DIM; i++)
1299         {
1300             periods[i] = TRUE;
1301         }
1302         MPI_Comm comm_cart = MPI_COMM_NULL;
1303         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1304         /* We overwrite the old communicator with the new cartesian one */
1305         cr->mpi_comm_mygroup = comm_cart;
1306     }
1307
1308     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1309     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1310
1311     if (cartSetup.bCartesianPP_PME)
1312     {
1313         /* Since we want to use the original cartesian setup for sim,
1314          * and not the one after split, we need to make an index.
1315          */
1316         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1317         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1318         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1319         /* Get the rank of the DD master,
1320          * above we made sure that the master node is a PP node.
1321          */
1322         int rank = MASTER(cr) ? dd->rank : 0;
1323         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1324     }
1325     else if (cartSetup.bCartesianPP)
1326     {
1327         if (!comm->ddRankSetup.usePmeOnlyRanks)
1328         {
1329             /* The PP communicator is also
1330              * the communicator for this simulation
1331              */
1332             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1333         }
1334         cr->nodeid = dd->rank;
1335
1336         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1337
1338         /* We need to make an index to go from the coordinates
1339          * to the nodeid of this simulation.
1340          */
1341         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1342         std::vector<int> buf(dd->nnodes);
1343         if (thisRankHasDuty(cr, DUTY_PP))
1344         {
1345             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1346         }
1347         /* Communicate the ddindex to simulation nodeid index */
1348         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1349
1350         /* Determine the master coordinates and rank.
1351          * The DD master should be the same node as the master of this sim.
1352          */
1353         for (int i = 0; i < dd->nnodes; i++)
1354         {
1355             if (cartSetup.ddindex2simnodeid[i] == 0)
1356             {
1357                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1358                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1359             }
1360         }
1361         if (debug)
1362         {
1363             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1364         }
1365     }
1366     else
1367     {
1368         /* No Cartesian communicators */
1369         /* We use the rank in dd->comm->all as DD index */
1370         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1371         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1372         dd->masterrank = 0;
1373         clear_ivec(dd->master_ci);
1374     }
1375 #endif
1376
1377     GMX_LOG(mdlog.info)
1378             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1379                                  dd->rank,
1380                                  dd->ci[XX],
1381                                  dd->ci[YY],
1382                                  dd->ci[ZZ]);
1383     if (debug)
1384     {
1385         fprintf(debug,
1386                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1387                 dd->rank,
1388                 dd->ci[XX],
1389                 dd->ci[YY],
1390                 dd->ci[ZZ]);
1391     }
1392 }
1393
1394 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1395 {
1396 #if GMX_MPI
1397     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1398
1399     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1400     {
1401         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1402         std::vector<int> buf(dd->nnodes);
1403         if (thisRankHasDuty(cr, DUTY_PP))
1404         {
1405             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1406         }
1407         /* Communicate the ddindex to simulation nodeid index */
1408         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1409     }
1410 #else
1411     GMX_UNUSED_VALUE(dd);
1412     GMX_UNUSED_VALUE(cr);
1413 #endif
1414 }
1415
1416 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1417                                              t_commrec*           cr,
1418                                              const DdRankOrder    ddRankOrder,
1419                                              bool gmx_unused      reorder,
1420                                              const DDRankSetup&   ddRankSetup,
1421                                              ivec                 ddCellIndex,
1422                                              std::vector<int>*    pmeRanks)
1423 {
1424     CartesianRankSetup cartSetup;
1425
1426     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1427     cartSetup.bCartesianPP_PME = false;
1428
1429     const ivec& numDDCells = ddRankSetup.numPPCells;
1430     /* Initially we set ntot to the number of PP cells */
1431     copy_ivec(numDDCells, cartSetup.ntot);
1432
1433     if (cartSetup.bCartesianPP)
1434     {
1435         const int numDDCellsTot = ddRankSetup.numPPRanks;
1436         bool      bDiv[DIM];
1437         for (int i = 1; i < DIM; i++)
1438         {
1439             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1440         }
1441         if (bDiv[YY] || bDiv[ZZ])
1442         {
1443             cartSetup.bCartesianPP_PME = TRUE;
1444             /* If we have 2D PME decomposition, which is always in x+y,
1445              * we stack the PME only nodes in z.
1446              * Otherwise we choose the direction that provides the thinnest slab
1447              * of PME only nodes as this will have the least effect
1448              * on the PP communication.
1449              * But for the PME communication the opposite might be better.
1450              */
1451             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1452             {
1453                 cartSetup.cartpmedim = ZZ;
1454             }
1455             else
1456             {
1457                 cartSetup.cartpmedim = YY;
1458             }
1459             cartSetup.ntot[cartSetup.cartpmedim] +=
1460                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1461         }
1462         else
1463         {
1464             GMX_LOG(mdlog.info)
1465                     .appendTextFormatted(
1466                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1467                             "nx*nz (%d*%d)",
1468                             ddRankSetup.numRanksDoingPme,
1469                             numDDCells[XX],
1470                             numDDCells[YY],
1471                             numDDCells[XX],
1472                             numDDCells[ZZ]);
1473             GMX_LOG(mdlog.info)
1474                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1475         }
1476     }
1477
1478     if (cartSetup.bCartesianPP_PME)
1479     {
1480 #if GMX_MPI
1481         int  rank = 0;
1482         ivec periods;
1483
1484         GMX_LOG(mdlog.info)
1485                 .appendTextFormatted(
1486                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1487                         cartSetup.ntot[XX],
1488                         cartSetup.ntot[YY],
1489                         cartSetup.ntot[ZZ]);
1490
1491         for (int i = 0; i < DIM; i++)
1492         {
1493             periods[i] = TRUE;
1494         }
1495         MPI_Comm comm_cart = MPI_COMM_NULL;
1496         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1497         MPI_Comm_rank(comm_cart, &rank);
1498         if (MASTER(cr) && rank != 0)
1499         {
1500             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1501         }
1502
1503         /* With this assigment we loose the link to the original communicator
1504          * which will usually be MPI_COMM_WORLD, unless have multisim.
1505          */
1506         cr->mpi_comm_mysim = comm_cart;
1507         cr->sim_nodeid     = rank;
1508
1509         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1510
1511         GMX_LOG(mdlog.info)
1512                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1513                                      cr->sim_nodeid,
1514                                      ddCellIndex[XX],
1515                                      ddCellIndex[YY],
1516                                      ddCellIndex[ZZ]);
1517
1518         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1519         {
1520             cr->duty = DUTY_PP;
1521         }
1522         if (!ddRankSetup.usePmeOnlyRanks
1523             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1524         {
1525             cr->duty = DUTY_PME;
1526         }
1527
1528         /* Split the sim communicator into PP and PME only nodes */
1529         MPI_Comm_split(cr->mpi_comm_mysim,
1530                        getThisRankDuties(cr),
1531                        dd_index(cartSetup.ntot, ddCellIndex),
1532                        &cr->mpi_comm_mygroup);
1533 #else
1534         GMX_UNUSED_VALUE(ddCellIndex);
1535 #endif
1536     }
1537     else
1538     {
1539         switch (ddRankOrder)
1540         {
1541             case DdRankOrder::pp_pme:
1542                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1543                 break;
1544             case DdRankOrder::interleave:
1545                 /* Interleave the PP-only and PME-only ranks */
1546                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1547                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1548                 break;
1549             case DdRankOrder::cartesian: break;
1550             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1551         }
1552
1553         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1554         {
1555             cr->duty = DUTY_PME;
1556         }
1557         else
1558         {
1559             cr->duty = DUTY_PP;
1560         }
1561 #if GMX_MPI
1562         /* Split the sim communicator into PP and PME only nodes */
1563         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1564         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1565 #endif
1566     }
1567
1568     GMX_LOG(mdlog.info)
1569             .appendTextFormatted("This rank does only %s work.\n",
1570                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1571
1572     return cartSetup;
1573 }
1574
1575 /*! \brief Makes the PP communicator and the PME communicator, when needed
1576  *
1577  * Returns the Cartesian rank setup.
1578  * Sets \p cr->mpi_comm_mygroup
1579  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1580  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1581  */
1582 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1583                                                  const DDSettings&    ddSettings,
1584                                                  const DdRankOrder    ddRankOrder,
1585                                                  const DDRankSetup&   ddRankSetup,
1586                                                  t_commrec*           cr,
1587                                                  ivec                 ddCellIndex,
1588                                                  std::vector<int>*    pmeRanks)
1589 {
1590     CartesianRankSetup cartSetup;
1591
1592     // As a default, both group and sim communicators are equal to the default communicator
1593     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1594     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1595     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1596     cr->nodeid           = cr->rankInDefaultCommunicator;
1597     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1598
1599     if (ddRankSetup.usePmeOnlyRanks)
1600     {
1601         /* Split the communicator into a PP and PME part */
1602         cartSetup = split_communicator(
1603                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1604     }
1605     else
1606     {
1607         /* All nodes do PP and PME */
1608         /* We do not require separate communicators */
1609         cartSetup.bCartesianPP     = false;
1610         cartSetup.bCartesianPP_PME = false;
1611     }
1612
1613     return cartSetup;
1614 }
1615
1616 /*! \brief For PP ranks, sets or makes the communicator
1617  *
1618  * For PME ranks get the rank id.
1619  * For PP only ranks, sets the PME-only rank.
1620  */
1621 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1622                                     const DDSettings&        ddSettings,
1623                                     gmx::ArrayRef<const int> pmeRanks,
1624                                     t_commrec*               cr,
1625                                     const int                numAtomsInSystem,
1626                                     gmx_domdec_t*            dd)
1627 {
1628     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1629     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1630
1631     if (thisRankHasDuty(cr, DUTY_PP))
1632     {
1633         /* Copy or make a new PP communicator */
1634
1635         /* We (possibly) reordered the nodes in split_communicator,
1636          * so it is no longer required in make_pp_communicator.
1637          */
1638         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1639
1640         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1641     }
1642     else
1643     {
1644         receive_ddindex2simnodeid(dd, cr);
1645     }
1646
1647     if (!thisRankHasDuty(cr, DUTY_PME))
1648     {
1649         /* Set up the commnuication to our PME node */
1650         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1651         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1652         if (debug)
1653         {
1654             fprintf(debug,
1655                     "My pme_nodeid %d receive ener %s\n",
1656                     dd->pme_nodeid,
1657                     gmx::boolToString(dd->pme_receive_vir_ener));
1658         }
1659     }
1660     else
1661     {
1662         dd->pme_nodeid = -1;
1663     }
1664
1665     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1666     if (MASTER(cr))
1667     {
1668         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1669     }
1670 }
1671
1672 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1673 {
1674     real* slb_frac = nullptr;
1675     if (nc > 1 && size_string != nullptr)
1676     {
1677         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1678         snew(slb_frac, nc);
1679         real tot = 0;
1680         for (int i = 0; i < nc; i++)
1681         {
1682             double dbl = 0;
1683             int    n   = 0;
1684             sscanf(size_string, "%20lf%n", &dbl, &n);
1685             if (dbl == 0)
1686             {
1687                 gmx_fatal(FARGS,
1688                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1689                           dir,
1690                           size_string);
1691             }
1692             slb_frac[i] = dbl;
1693             size_string += n;
1694             tot += slb_frac[i];
1695         }
1696         /* Normalize */
1697         std::string relativeCellSizes = "Relative cell sizes:";
1698         for (int i = 0; i < nc; i++)
1699         {
1700             slb_frac[i] /= tot;
1701             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1702         }
1703         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1704     }
1705
1706     return slb_frac;
1707 }
1708
1709 static int multi_body_bondeds_count(const gmx_mtop_t& mtop)
1710 {
1711     int n = 0;
1712     for (const auto ilists : IListRange(mtop))
1713     {
1714         for (auto& ilist : extractILists(ilists.list(), IF_BOND))
1715         {
1716             if (NRAL(ilist.functionType) > 2)
1717             {
1718                 n += ilists.nmol() * (ilist.iatoms.size() / ilistStride(ilist));
1719             }
1720         }
1721     }
1722
1723     return n;
1724 }
1725
1726 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1727 {
1728     int   nst = def;
1729     char* val = getenv(env_var);
1730     if (val)
1731     {
1732         if (sscanf(val, "%20d", &nst) <= 0)
1733         {
1734             nst = 1;
1735         }
1736         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1737     }
1738
1739     return nst;
1740 }
1741
1742 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec& inputrec, const gmx::MDLogger& mdlog)
1743 {
1744     if (inputrec.pbcType == PbcType::Screw
1745         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1746     {
1747         gmx_fatal(FARGS,
1748                   "With pbc=%s can only do domain decomposition in the x-direction",
1749                   c_pbcTypeNames[inputrec.pbcType].c_str());
1750     }
1751
1752     if (inputrec.nstlist == 0)
1753     {
1754         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1755     }
1756
1757     if (inputrec.comm_mode == ComRemovalAlgorithm::Angular && inputrec.pbcType != PbcType::No)
1758     {
1759         GMX_LOG(mdlog.warning)
1760                 .appendText(
1761                         "comm-mode angular will give incorrect results when the comm group "
1762                         "partially crosses a periodic boundary");
1763     }
1764 }
1765
1766 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1767 {
1768     real r = ddbox.box_size[XX];
1769     for (int d = 0; d < DIM; d++)
1770     {
1771         if (numDomains[d] > 1)
1772         {
1773             /* Check using the initial average cell size */
1774             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1775         }
1776     }
1777
1778     return r;
1779 }
1780
1781 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1782  */
1783 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1784                                   const std::string&   reasonStr,
1785                                   const gmx::MDLogger& mdlog)
1786 {
1787     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1788     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1789
1790     if (cmdlineDlbState == DlbState::onUser)
1791     {
1792         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1793     }
1794     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1795     {
1796         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1797     }
1798     return DlbState::offForever;
1799 }
1800
1801 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1802  *
1803  * This function parses the parameters of "-dlb" command line option setting
1804  * corresponding state values. Then it checks the consistency of the determined
1805  * state with other run parameters and settings. As a result, the initial state
1806  * may be altered or an error may be thrown if incompatibility of options is detected.
1807  *
1808  * \param [in] mdlog       Logger.
1809  * \param [in] dlbOption   Enum value for the DLB option.
1810  * \param [in] bRecordLoad True if the load balancer is recording load information.
1811  * \param [in] mdrunOptions  Options for mdrun.
1812  * \param [in] inputrec    Pointer mdrun to input parameters.
1813  * \returns                DLB initial/startup state.
1814  */
1815 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1816                                          DlbOption                dlbOption,
1817                                          gmx_bool                 bRecordLoad,
1818                                          const gmx::MdrunOptions& mdrunOptions,
1819                                          const t_inputrec&        inputrec)
1820 {
1821     DlbState dlbState = DlbState::offCanTurnOn;
1822
1823     switch (dlbOption)
1824     {
1825         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1826         case DlbOption::no: dlbState = DlbState::offUser; break;
1827         case DlbOption::yes: dlbState = DlbState::onUser; break;
1828         default: gmx_incons("Invalid dlbOption enum value");
1829     }
1830
1831     /* Reruns don't support DLB: bail or override auto mode */
1832     if (mdrunOptions.rerun)
1833     {
1834         std::string reasonStr = "it is not supported in reruns.";
1835         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1836     }
1837
1838     /* Unsupported integrators */
1839     if (!EI_DYNAMICS(inputrec.eI))
1840     {
1841         auto reasonStr =
1842                 gmx::formatString("it is only supported with dynamics, not with integrator '%s'.",
1843                                   enumValueToString(inputrec.eI));
1844         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1845     }
1846
1847     /* Without cycle counters we can't time work to balance on */
1848     if (!bRecordLoad)
1849     {
1850         std::string reasonStr =
1851                 "cycle counters unsupported or not enabled in the operating system kernel.";
1852         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1853     }
1854
1855     if (mdrunOptions.reproducible)
1856     {
1857         std::string reasonStr = "you started a reproducible run.";
1858         switch (dlbState)
1859         {
1860             case DlbState::offUser: break;
1861             case DlbState::offForever:
1862                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1863                 break;
1864             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1865             case DlbState::onCanTurnOff:
1866                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1867                 break;
1868             case DlbState::onUser:
1869                 return forceDlbOffOrBail(
1870                         dlbState,
1871                         reasonStr
1872                                 + " In load balanced runs binary reproducibility cannot be "
1873                                   "ensured.",
1874                         mdlog);
1875             default:
1876                 gmx_fatal(FARGS,
1877                           "Death horror: undefined case (%d) for load balancing choice",
1878                           static_cast<int>(dlbState));
1879         }
1880     }
1881
1882     return dlbState;
1883 }
1884
1885 static gmx_domdec_comm_t* init_dd_comm()
1886 {
1887     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1888
1889     comm->n_load_have    = 0;
1890     comm->n_load_collect = 0;
1891
1892     comm->haveTurnedOffDlb = false;
1893
1894     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1895     {
1896         comm->sum_nat[i] = 0;
1897     }
1898     comm->ndecomp   = 0;
1899     comm->nload     = 0;
1900     comm->load_step = 0;
1901     comm->load_sum  = 0;
1902     comm->load_max  = 0;
1903     clear_ivec(comm->load_lim);
1904     comm->load_mdf = 0;
1905     comm->load_pme = 0;
1906
1907     /* This should be replaced by a unique pointer */
1908     comm->balanceRegion = ddBalanceRegionAllocate();
1909
1910     return comm;
1911 }
1912
1913 static void setupUpdateGroups(const gmx::MDLogger&              mdlog,
1914                               const gmx_mtop_t&                 mtop,
1915                               ArrayRef<const RangePartitioning> updateGroupingsPerMoleculeType,
1916                               const bool                        useUpdateGroups,
1917                               const real                        maxUpdateGroupRadius,
1918                               DDSystemInfo*                     systemInfo)
1919 {
1920     systemInfo->updateGroupingsPerMoleculeType = updateGroupingsPerMoleculeType;
1921     systemInfo->useUpdateGroups                = useUpdateGroups;
1922     systemInfo->maxUpdateGroupRadius           = maxUpdateGroupRadius;
1923
1924     if (systemInfo->useUpdateGroups)
1925     {
1926         int numUpdateGroups = 0;
1927         for (const auto& molblock : mtop.molblock)
1928         {
1929             numUpdateGroups += molblock.nmol
1930                                * systemInfo->updateGroupingsPerMoleculeType[molblock.type].numBlocks();
1931         }
1932
1933         GMX_LOG(mdlog.info)
1934                 .appendTextFormatted(
1935                         "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1936                         "nm\n",
1937                         numUpdateGroups,
1938                         mtop.natoms / static_cast<double>(numUpdateGroups),
1939                         systemInfo->maxUpdateGroupRadius);
1940     }
1941 }
1942
1943 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
1944     npbcdim(numPbcDimensions(ir.pbcType)),
1945     numBoundedDimensions(inputrec2nboundeddim(&ir)),
1946     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
1947     haveScrewPBC(ir.pbcType == PbcType::Screw)
1948 {
1949 }
1950
1951 /* Returns whether molecules are always whole, i.e. not broken by PBC */
1952 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
1953                                     const bool                                  useUpdateGroups,
1954                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType)
1955 {
1956     if (useUpdateGroups)
1957     {
1958         GMX_RELEASE_ASSERT(updateGroupingsPerMoleculeType.size() == mtop.moltype.size(),
1959                            "Need one grouping per moltype");
1960         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
1961         {
1962             if (updateGroupingsPerMoleculeType[mol].numBlocks() > 1)
1963             {
1964                 return false;
1965             }
1966         }
1967     }
1968     else
1969     {
1970         for (const auto& moltype : mtop.moltype)
1971         {
1972             if (moltype.atoms.nr > 1)
1973             {
1974                 return false;
1975             }
1976         }
1977     }
1978
1979     return true;
1980 }
1981
1982 /*! \brief Generate the simulation system information */
1983 static DDSystemInfo getSystemInfo(const gmx::MDLogger&              mdlog,
1984                                   DDRole                            ddRole,
1985                                   MPI_Comm                          communicator,
1986                                   const DomdecOptions&              options,
1987                                   const gmx_mtop_t&                 mtop,
1988                                   const t_inputrec&                 ir,
1989                                   const matrix                      box,
1990                                   ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
1991                                   const bool                        useUpdateGroups,
1992                                   const real                        maxUpdateGroupRadius,
1993                                   gmx::ArrayRef<const gmx::RVec>    xGlobal)
1994 {
1995     const real tenPercentMargin = 1.1;
1996
1997     DDSystemInfo systemInfo;
1998
1999     setupUpdateGroups(
2000             mdlog, mtop, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, &systemInfo);
2001
2002     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2003             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingsPerMoleculeType);
2004     systemInfo.haveInterDomainBondeds =
2005             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2006     systemInfo.haveInterDomainMultiBodyBondeds =
2007             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(mtop) > 0);
2008
2009     if (systemInfo.useUpdateGroups)
2010     {
2011         systemInfo.mayHaveSplitConstraints = false;
2012         systemInfo.mayHaveSplitSettles     = false;
2013     }
2014     else
2015     {
2016         systemInfo.mayHaveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2017                                               || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2018         systemInfo.mayHaveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2019     }
2020
2021     if (ir.rlist == 0)
2022     {
2023         /* Set the cut-off to some very large value,
2024          * so we don't need if statements everywhere in the code.
2025          * We use sqrt, since the cut-off is squared in some places.
2026          */
2027         systemInfo.cutoff = GMX_CUTOFF_INF;
2028     }
2029     else
2030     {
2031         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2032     }
2033     systemInfo.minCutoffForMultiBody = 0;
2034
2035     /* Determine the minimum cell size limit, affected by many factors */
2036     systemInfo.cellsizeLimit             = 0;
2037     systemInfo.filterBondedCommunication = false;
2038
2039     /* We do not allow home atoms to move beyond the neighboring domain
2040      * between domain decomposition steps, which limits the cell size.
2041      * Get an estimate of cell size limit due to atom displacement.
2042      * In most cases this is a large overestimate, because it assumes
2043      * non-interaction atoms.
2044      * We set the chance to 1 in a trillion steps.
2045      */
2046     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2047     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2048             mtop, ir, systemInfo.updateGroupingsPerMoleculeType, c_chanceThatAtomMovesBeyondDomain);
2049     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2050
2051     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2052
2053     /* TODO: PME decomposition currently requires atoms not to be more than
2054      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2055      *       In nearly all cases, limitForAtomDisplacement will be smaller
2056      *       than 2/3*rlist, so the PME requirement is satisfied.
2057      *       But it would be better for both correctness and performance
2058      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2059      *       Note that we would need to improve the pairlist buffer case.
2060      */
2061
2062     if (systemInfo.haveInterDomainBondeds)
2063     {
2064         if (options.minimumCommunicationRange > 0)
2065         {
2066             systemInfo.minCutoffForMultiBody =
2067                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2068             if (options.useBondedCommunication)
2069             {
2070                 systemInfo.filterBondedCommunication =
2071                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2072             }
2073             else
2074             {
2075                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2076             }
2077         }
2078         else if (ir.bPeriodicMols)
2079         {
2080             /* Can not easily determine the required cut-off */
2081             GMX_LOG(mdlog.warning)
2082                     .appendText(
2083                             "NOTE: Periodic molecules are present in this system. Because of this, "
2084                             "the domain decomposition algorithm cannot easily determine the "
2085                             "minimum cell size that it requires for treating bonded interactions. "
2086                             "Instead, domain decomposition will assume that half the non-bonded "
2087                             "cut-off will be a suitable lower bound.");
2088             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2089         }
2090         else
2091         {
2092             real r_2b = 0;
2093             real r_mb = 0;
2094
2095             if (ddRole == DDRole::Master)
2096             {
2097                 dd_bonded_cg_distance(mdlog, mtop, ir, xGlobal, box, options.ddBondedChecking, &r_2b, &r_mb);
2098             }
2099             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2100             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2101
2102             /* We use an initial margin of 10% for the minimum cell size,
2103              * except when we are just below the non-bonded cut-off.
2104              */
2105             if (options.useBondedCommunication)
2106             {
2107                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2108                 {
2109                     const real r_bonded              = std::max(r_2b, r_mb);
2110                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2111                     /* This is the (only) place where we turn on the filtering */
2112                     systemInfo.filterBondedCommunication = true;
2113                 }
2114                 else
2115                 {
2116                     const real r_bonded = r_mb;
2117                     systemInfo.minCutoffForMultiBody =
2118                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2119                 }
2120                 /* We determine cutoff_mbody later */
2121                 systemInfo.increaseMultiBodyCutoff = true;
2122             }
2123             else
2124             {
2125                 /* No special bonded communication,
2126                  * simply increase the DD cut-off.
2127                  */
2128                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2129                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2130             }
2131         }
2132         GMX_LOG(mdlog.info)
2133                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2134                                      systemInfo.minCutoffForMultiBody);
2135
2136         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2137     }
2138
2139     systemInfo.constraintCommunicationRange = 0;
2140     if (systemInfo.mayHaveSplitConstraints && options.constraintCommunicationRange <= 0)
2141     {
2142         /* There is a cell size limit due to the constraints (P-LINCS) */
2143         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2144         GMX_LOG(mdlog.info)
2145                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2146                                      systemInfo.constraintCommunicationRange);
2147         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2148         {
2149             GMX_LOG(mdlog.info)
2150                     .appendText(
2151                             "This distance will limit the DD cell size, you can override this with "
2152                             "-rcon");
2153         }
2154     }
2155     else if (options.constraintCommunicationRange > 0)
2156     {
2157         /* Here we do not check for dd->splitConstraints.
2158          * because one can also set a cell size limit for virtual sites only
2159          * and at this point we don't know yet if there are intercg v-sites.
2160          */
2161         GMX_LOG(mdlog.info)
2162                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2163                                      options.constraintCommunicationRange);
2164         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2165     }
2166     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2167
2168     return systemInfo;
2169 }
2170
2171 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2172  * implemented. */
2173 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2174                              DDRole               ddRole,
2175                              MPI_Comm             communicator,
2176                              int                  numNodes,
2177                              const DomdecOptions& options,
2178                              const DDSettings&    ddSettings,
2179                              const DDSystemInfo&  systemInfo,
2180                              const real           cellsizeLimit,
2181                              const gmx_ddbox_t&   ddbox)
2182 {
2183     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2184     {
2185         const bool  bC = (systemInfo.mayHaveSplitConstraints
2186                          && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2187         std::string message =
2188                 gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
2189                                   !bC ? "-rdd" : "-rcon",
2190                                   ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2191                                   bC ? " or your LINCS settings" : "");
2192
2193         gmx_fatal_collective(FARGS,
2194                              communicator,
2195                              ddRole == DDRole::Master,
2196                              "There is no domain decomposition for %d ranks that is compatible "
2197                              "with the given box and a minimum cell size of %g nm\n"
2198                              "%s\n"
2199                              "Look in the log file for details on the domain decomposition",
2200                              numNodes - ddGridSetup.numPmeOnlyRanks,
2201                              cellsizeLimit,
2202                              message.c_str());
2203     }
2204
2205     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2206     if (acs < cellsizeLimit)
2207     {
2208         if (options.numCells[XX] <= 0)
2209         {
2210             GMX_RELEASE_ASSERT(
2211                     false,
2212                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2213         }
2214         else
2215         {
2216             gmx_fatal_collective(
2217                     FARGS,
2218                     communicator,
2219                     ddRole == DDRole::Master,
2220                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2221                     "options -dd, -rdd or -rcon, see the log file for details",
2222                     acs,
2223                     cellsizeLimit);
2224         }
2225     }
2226
2227     const int numPPRanks =
2228             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2229     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2230     {
2231         gmx_fatal_collective(FARGS,
2232                              communicator,
2233                              ddRole == DDRole::Master,
2234                              "The size of the domain decomposition grid (%d) does not match the "
2235                              "number of PP ranks (%d). The total number of ranks is %d",
2236                              numPPRanks,
2237                              numNodes - ddGridSetup.numPmeOnlyRanks,
2238                              numNodes);
2239     }
2240     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2241     {
2242         gmx_fatal_collective(FARGS,
2243                              communicator,
2244                              ddRole == DDRole::Master,
2245                              "The number of separate PME ranks (%d) is larger than the number of "
2246                              "PP ranks (%d), this is not supported.",
2247                              ddGridSetup.numPmeOnlyRanks,
2248                              numPPRanks);
2249     }
2250 }
2251
2252 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2253 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2254                                   int                  numNodes,
2255                                   const DDGridSetup&   ddGridSetup,
2256                                   const t_inputrec&    ir)
2257 {
2258     GMX_LOG(mdlog.info)
2259             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2260                                  ddGridSetup.numDomains[XX],
2261                                  ddGridSetup.numDomains[YY],
2262                                  ddGridSetup.numDomains[ZZ],
2263                                  ddGridSetup.numPmeOnlyRanks);
2264
2265     DDRankSetup ddRankSetup;
2266
2267     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2268     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2269
2270     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2271     if (ddRankSetup.usePmeOnlyRanks)
2272     {
2273         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2274     }
2275     else
2276     {
2277         ddRankSetup.numRanksDoingPme =
2278                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2279     }
2280
2281     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2282     {
2283         /* The following choices should match those
2284          * in comm_cost_est in domdec_setup.c.
2285          * Note that here the checks have to take into account
2286          * that the decomposition might occur in a different order than xyz
2287          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2288          * in which case they will not match those in comm_cost_est,
2289          * but since that is mainly for testing purposes that's fine.
2290          */
2291         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2292             && ddGridSetup.ddDimensions[1] == YY
2293             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2294             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2295             && getenv("GMX_PMEONEDD") == nullptr)
2296         {
2297             ddRankSetup.npmedecompdim = 2;
2298             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2299             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2300         }
2301         else
2302         {
2303             /* In case nc is 1 in both x and y we could still choose to
2304              * decompose pme in y instead of x, but we use x for simplicity.
2305              */
2306             ddRankSetup.npmedecompdim = 1;
2307             if (ddGridSetup.ddDimensions[0] == YY)
2308             {
2309                 ddRankSetup.npmenodes_x = 1;
2310                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2311             }
2312             else
2313             {
2314                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2315                 ddRankSetup.npmenodes_y = 1;
2316             }
2317         }
2318         GMX_LOG(mdlog.info)
2319                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2320                                      ddRankSetup.npmenodes_x,
2321                                      ddRankSetup.npmenodes_y,
2322                                      1);
2323     }
2324     else
2325     {
2326         ddRankSetup.npmedecompdim = 0;
2327         ddRankSetup.npmenodes_x   = 0;
2328         ddRankSetup.npmenodes_y   = 0;
2329     }
2330
2331     return ddRankSetup;
2332 }
2333
2334 /*! \brief Set the cell size and interaction limits */
2335 static void set_dd_limits(const gmx::MDLogger& mdlog,
2336                           DDRole               ddRole,
2337                           gmx_domdec_t*        dd,
2338                           const DomdecOptions& options,
2339                           const DDSettings&    ddSettings,
2340                           const DDSystemInfo&  systemInfo,
2341                           const DDGridSetup&   ddGridSetup,
2342                           const int            numPPRanks,
2343                           const gmx_mtop_t&    mtop,
2344                           const t_inputrec&    ir,
2345                           const gmx_ddbox_t&   ddbox)
2346 {
2347     gmx_domdec_comm_t* comm = dd->comm;
2348     comm->ddSettings        = ddSettings;
2349
2350     /* Initialize to GPU share count to 0, might change later */
2351     comm->nrank_gpu_shared = 0;
2352
2353     comm->dlbState = comm->ddSettings.initialDlbState;
2354     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2355     /* To consider turning DLB on after 2*nstlist steps we need to check
2356      * at partitioning count 3. Thus we need to increase the first count by 2.
2357      */
2358     comm->ddPartioningCountFirstDlbOff += 2;
2359
2360     comm->bPMELoadBalDLBLimits = FALSE;
2361
2362     /* Allocate the charge group/atom sorting struct */
2363     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2364
2365     comm->systemInfo = systemInfo;
2366
2367     if (systemInfo.useUpdateGroups)
2368     {
2369         /* Note: We would like to use dd->nnodes for the atom count estimate,
2370          *       but that is not yet available here. But this anyhow only
2371          *       affect performance up to the second dd_partition_system call.
2372          */
2373         const int homeAtomCountEstimate = mtop.natoms / numPPRanks;
2374         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2375                 mtop, systemInfo.updateGroupingsPerMoleculeType, maxReferenceTemperature(ir), homeAtomCountEstimate);
2376     }
2377
2378     /* Set the DD setup given by ddGridSetup */
2379     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2380     dd->ndim = ddGridSetup.numDDDimensions;
2381     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2382
2383     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2384
2385     snew(comm->slb_frac, DIM);
2386     if (isDlbDisabled(comm))
2387     {
2388         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2389         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2390         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2391     }
2392
2393     /* Set the multi-body cut-off and cellsize limit for DLB */
2394     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2395     comm->cellsize_limit = systemInfo.cellsizeLimit;
2396     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2397     {
2398         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2399         {
2400             /* Set the bonded communication distance to halfway
2401              * the minimum and the maximum,
2402              * since the extra communication cost is nearly zero.
2403              */
2404             real acs           = average_cellsize_min(ddbox, dd->numCells);
2405             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2406             if (!isDlbDisabled(comm))
2407             {
2408                 /* Check if this does not limit the scaling */
2409                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2410             }
2411             if (!systemInfo.filterBondedCommunication)
2412             {
2413                 /* Without bBondComm do not go beyond the n.b. cut-off */
2414                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2415                 if (comm->cellsize_limit >= systemInfo.cutoff)
2416                 {
2417                     /* We don't loose a lot of efficieny
2418                      * when increasing it to the n.b. cut-off.
2419                      * It can even be slightly faster, because we need
2420                      * less checks for the communication setup.
2421                      */
2422                     comm->cutoff_mbody = systemInfo.cutoff;
2423                 }
2424             }
2425             /* Check if we did not end up below our original limit */
2426             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2427
2428             if (comm->cutoff_mbody > comm->cellsize_limit)
2429             {
2430                 comm->cellsize_limit = comm->cutoff_mbody;
2431             }
2432         }
2433         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2434     }
2435
2436     if (debug)
2437     {
2438         fprintf(debug,
2439                 "Bonded atom communication beyond the cut-off: %s\n"
2440                 "cellsize limit %f\n",
2441                 gmx::boolToString(systemInfo.filterBondedCommunication),
2442                 comm->cellsize_limit);
2443     }
2444
2445     if (ddRole == DDRole::Master)
2446     {
2447         check_dd_restrictions(dd, ir, mdlog);
2448     }
2449 }
2450
2451 static void writeSettings(gmx::TextWriter*   log,
2452                           gmx_domdec_t*      dd,
2453                           const gmx_mtop_t&  mtop,
2454                           const t_inputrec&  ir,
2455                           gmx_bool           bDynLoadBal,
2456                           real               dlb_scale,
2457                           const gmx_ddbox_t* ddbox)
2458 {
2459     gmx_domdec_comm_t* comm = dd->comm;
2460
2461     if (bDynLoadBal)
2462     {
2463         log->writeString("The maximum number of communication pulses is:");
2464         for (int d = 0; d < dd->ndim; d++)
2465         {
2466             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2467         }
2468         log->ensureLineBreak();
2469         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2470                                 comm->cellsize_limit);
2471         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2472         log->writeString("The allowed shrink of domain decomposition cells is:");
2473         for (int d = 0; d < DIM; d++)
2474         {
2475             if (dd->numCells[d] > 1)
2476             {
2477                 const real shrink =
2478                         (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2479                                 ? 0
2480                                 : comm->cellsize_min_dlb[d]
2481                                           / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2482                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2483             }
2484         }
2485         log->ensureLineBreak();
2486     }
2487     else
2488     {
2489         ivec np;
2490         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2491         log->writeString("The initial number of communication pulses is:");
2492         for (int d = 0; d < dd->ndim; d++)
2493         {
2494             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2495         }
2496         log->ensureLineBreak();
2497         log->writeString("The initial domain decomposition cell size is:");
2498         for (int d = 0; d < DIM; d++)
2499         {
2500             if (dd->numCells[d] > 1)
2501             {
2502                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2503             }
2504         }
2505         log->ensureLineBreak();
2506         log->writeLine();
2507     }
2508
2509     const bool haveInterDomainVsites =
2510             (countInterUpdategroupVsites(mtop, comm->systemInfo.updateGroupingsPerMoleculeType) != 0);
2511
2512     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2513         || comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2514     {
2515         std::string decompUnits;
2516         if (comm->systemInfo.useUpdateGroups)
2517         {
2518             decompUnits = "atom groups";
2519         }
2520         else
2521         {
2522             decompUnits = "atoms";
2523         }
2524
2525         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2526                                 decompUnits.c_str());
2527         log->writeLineFormatted(
2528                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2529
2530         real limit = 0;
2531         if (bDynLoadBal)
2532         {
2533             limit = dd->comm->cellsize_limit;
2534         }
2535         else
2536         {
2537             if (dd->unitCellInfo.ddBoxIsDynamic)
2538             {
2539                 log->writeLine(
2540                         "(the following are initial values, they could change due to box "
2541                         "deformation)");
2542             }
2543             limit = dd->comm->cellsize_min[XX];
2544             for (int d = 1; d < DIM; d++)
2545             {
2546                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2547             }
2548         }
2549
2550         if (comm->systemInfo.haveInterDomainBondeds)
2551         {
2552             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2553                                     "two-body bonded interactions",
2554                                     "(-rdd)",
2555                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2556             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2557                                     "multi-body bonded interactions",
2558                                     "(-rdd)",
2559                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2560                                             ? comm->cutoff_mbody
2561                                             : std::min(comm->systemInfo.cutoff, limit));
2562         }
2563         if (haveInterDomainVsites)
2564         {
2565             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2566         }
2567         if (comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2568         {
2569             std::string separation =
2570                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir.nProjOrder);
2571             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2572         }
2573         log->ensureLineBreak();
2574     }
2575 }
2576
2577 static void logSettings(const gmx::MDLogger& mdlog,
2578                         gmx_domdec_t*        dd,
2579                         const gmx_mtop_t&    mtop,
2580                         const t_inputrec&    ir,
2581                         real                 dlb_scale,
2582                         const gmx_ddbox_t*   ddbox)
2583 {
2584     gmx::StringOutputStream stream;
2585     gmx::TextWriter         log(&stream);
2586     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2587     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2588     {
2589         {
2590             log.ensureEmptyLine();
2591             log.writeLine(
2592                     "When dynamic load balancing gets turned on, these settings will change to:");
2593         }
2594         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2595     }
2596     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2597 }
2598
2599 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2600                                 gmx_domdec_t*        dd,
2601                                 real                 dlb_scale,
2602                                 const t_inputrec&    inputrec,
2603                                 const gmx_ddbox_t*   ddbox)
2604 {
2605     int npulse       = 0;
2606     int npulse_d_max = 0;
2607     int npulse_d     = 0;
2608
2609     gmx_domdec_comm_t* comm = dd->comm;
2610
2611     bool bNoCutOff = (inputrec.rvdw == 0 || inputrec.rcoulomb == 0);
2612
2613     /* Determine the maximum number of comm. pulses in one dimension */
2614
2615     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2616
2617     /* Determine the maximum required number of grid pulses */
2618     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2619     {
2620         /* Only a single pulse is required */
2621         npulse = 1;
2622     }
2623     else if (!bNoCutOff && comm->cellsize_limit > 0)
2624     {
2625         /* We round down slightly here to avoid overhead due to the latency
2626          * of extra communication calls when the cut-off
2627          * would be only slightly longer than the cell size.
2628          * Later cellsize_limit is redetermined,
2629          * so we can not miss interactions due to this rounding.
2630          */
2631         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2632     }
2633     else
2634     {
2635         /* There is no cell size limit */
2636         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2637     }
2638
2639     if (!bNoCutOff && npulse > 1)
2640     {
2641         /* See if we can do with less pulses, based on dlb_scale */
2642         npulse_d_max = 0;
2643         for (int d = 0; d < dd->ndim; d++)
2644         {
2645             int dim  = dd->dim[d];
2646             npulse_d = static_cast<int>(
2647                     1
2648                     + dd->numCells[dim] * comm->systemInfo.cutoff
2649                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2650             npulse_d_max = std::max(npulse_d_max, npulse_d);
2651         }
2652         npulse = std::min(npulse, npulse_d_max);
2653     }
2654
2655     /* This env var can override npulse */
2656     const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2657     if (ddPulseEnv > 0)
2658     {
2659         npulse = ddPulseEnv;
2660     }
2661
2662     comm->maxpulse       = 1;
2663     comm->bVacDLBNoLimit = (inputrec.pbcType == PbcType::No);
2664     for (int d = 0; d < dd->ndim; d++)
2665     {
2666         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2667         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2668         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2669         {
2670             comm->bVacDLBNoLimit = FALSE;
2671         }
2672     }
2673
2674     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2675     if (!comm->bVacDLBNoLimit)
2676     {
2677         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2678     }
2679     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2680     /* Set the minimum cell size for each DD dimension */
2681     for (int d = 0; d < dd->ndim; d++)
2682     {
2683         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2684         {
2685             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2686         }
2687         else
2688         {
2689             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2690         }
2691     }
2692     if (comm->cutoff_mbody <= 0)
2693     {
2694         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2695     }
2696     if (isDlbOn(comm))
2697     {
2698         set_dlb_limits(dd);
2699     }
2700 }
2701
2702 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2703 {
2704     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2705 }
2706
2707 bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType)
2708 {
2709     /* If each molecule is a single charge group
2710      * or we use domain decomposition for each periodic dimension,
2711      * we do not need to take pbc into account for the bonded interactions.
2712      */
2713     return (pbcType != PbcType::No && dd.comm->systemInfo.haveInterDomainBondeds
2714             && !(dd.numCells[XX] > 1 && dd.numCells[YY] > 1
2715                  && (dd.numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2716 }
2717
2718 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2719 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2720                                   gmx_domdec_t*        dd,
2721                                   real                 dlb_scale,
2722                                   const gmx_mtop_t&    mtop,
2723                                   const t_inputrec&    inputrec,
2724                                   const gmx_ddbox_t*   ddbox)
2725 {
2726     gmx_domdec_comm_t* comm        = dd->comm;
2727     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2728
2729     if (EEL_PME(inputrec.coulombtype) || EVDW_PME(inputrec.vdwtype))
2730     {
2731         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2732         if (ddRankSetup.npmedecompdim >= 2)
2733         {
2734             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2735         }
2736     }
2737     else
2738     {
2739         ddRankSetup.numRanksDoingPme = 0;
2740         if (dd->pme_nodeid >= 0)
2741         {
2742             gmx_fatal_collective(FARGS,
2743                                  dd->mpi_comm_all,
2744                                  DDMASTER(dd),
2745                                  "Can not have separate PME ranks without PME electrostatics");
2746         }
2747     }
2748
2749     if (debug)
2750     {
2751         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2752     }
2753     if (!isDlbDisabled(comm))
2754     {
2755         set_cell_limits_dlb(mdlog, dd, dlb_scale, inputrec, ddbox);
2756     }
2757
2758     logSettings(mdlog, dd, mtop, inputrec, dlb_scale, ddbox);
2759
2760     const real vol_frac = (inputrec.pbcType == PbcType::No)
2761                                   ? (1 - 1 / static_cast<double>(dd->nnodes))
2762                                   : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2763                                      / static_cast<double>(dd->nnodes));
2764     if (debug)
2765     {
2766         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2767     }
2768     int natoms_tot = mtop.natoms;
2769
2770     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2771 }
2772
2773 /*! \brief Get some important DD parameters which can be modified by env.vars */
2774 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2775                                 const DomdecOptions&     options,
2776                                 const gmx::MdrunOptions& mdrunOptions,
2777                                 const t_inputrec&        ir)
2778 {
2779     DDSettings ddSettings;
2780
2781     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2782     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2783     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2784     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2785     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2786     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2787     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2788     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2789     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2790
2791     if (ddSettings.useSendRecv2)
2792     {
2793         GMX_LOG(mdlog.info)
2794                 .appendText(
2795                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2796                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2797                         "communication");
2798     }
2799
2800     if (ddSettings.eFlop)
2801     {
2802         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2803         ddSettings.recordLoad = true;
2804     }
2805     else
2806     {
2807         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2808     }
2809
2810     ddSettings.initialDlbState =
2811             determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
2812     GMX_LOG(mdlog.info)
2813             .appendTextFormatted("Dynamic load balancing: %s",
2814                                  enumValueToString(ddSettings.initialDlbState));
2815
2816     return ddSettings;
2817 }
2818
2819 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2820
2821 gmx_domdec_t::~gmx_domdec_t() = default;
2822
2823 namespace gmx
2824 {
2825
2826 // TODO once the functionality stablizes, move this class and
2827 // supporting functionality into builder.cpp
2828 /*! \brief Impl class for DD builder */
2829 class DomainDecompositionBuilder::Impl
2830 {
2831 public:
2832     //! Constructor
2833     Impl(const MDLogger&                   mdlog,
2834          t_commrec*                        cr,
2835          const DomdecOptions&              options,
2836          const MdrunOptions&               mdrunOptions,
2837          const gmx_mtop_t&                 mtop,
2838          const t_inputrec&                 ir,
2839          const matrix                      box,
2840          ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2841          bool                              useUpdateGroups,
2842          real                              maxUpdateGroupRadius,
2843          ArrayRef<const RVec>              xGlobal);
2844
2845     //! Build the resulting DD manager
2846     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2847
2848     //! Objects used in constructing and configuring DD
2849     //! {
2850     //! Logging object
2851     const MDLogger& mdlog_;
2852     //! Communication object
2853     t_commrec* cr_;
2854     //! User-supplied options configuring DD behavior
2855     const DomdecOptions options_;
2856     //! Global system topology
2857     const gmx_mtop_t& mtop_;
2858     //! User input values from the tpr file
2859     const t_inputrec& ir_;
2860     //! }
2861
2862     //! Internal objects used in constructing DD
2863     //! {
2864     //! Settings combined from the user input
2865     DDSettings ddSettings_;
2866     //! Information derived from the simulation system
2867     DDSystemInfo systemInfo_;
2868     //! Box structure
2869     gmx_ddbox_t ddbox_ = { 0 };
2870     //! Organization of the DD grids
2871     DDGridSetup ddGridSetup_;
2872     //! Organzation of the DD ranks
2873     DDRankSetup ddRankSetup_;
2874     //! Number of DD cells in each dimension
2875     ivec ddCellIndex_ = { 0, 0, 0 };
2876     //! IDs of PME-only ranks
2877     std::vector<int> pmeRanks_;
2878     //! Contains a valid Cartesian-communicator-based setup, or defaults.
2879     CartesianRankSetup cartSetup_;
2880     //! }
2881 };
2882
2883 DomainDecompositionBuilder::Impl::Impl(const MDLogger&                   mdlog,
2884                                        t_commrec*                        cr,
2885                                        const DomdecOptions&              options,
2886                                        const MdrunOptions&               mdrunOptions,
2887                                        const gmx_mtop_t&                 mtop,
2888                                        const t_inputrec&                 ir,
2889                                        const matrix                      box,
2890                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2891                                        const bool                        useUpdateGroups,
2892                                        const real                        maxUpdateGroupRadius,
2893                                        ArrayRef<const RVec>              xGlobal) :
2894     mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir)
2895 {
2896     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2897
2898     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2899
2900     if (ddSettings_.eFlop > 1)
2901     {
2902         /* Ensure that we have different random flop counts on different ranks */
2903         srand(1 + cr_->rankInDefaultCommunicator);
2904     }
2905
2906     systemInfo_ = getSystemInfo(mdlog_,
2907                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2908                                 cr->mpiDefaultCommunicator,
2909                                 options_,
2910                                 mtop_,
2911                                 ir_,
2912                                 box,
2913                                 updateGroupingPerMoleculeType,
2914                                 useUpdateGroups,
2915                                 maxUpdateGroupRadius,
2916                                 xGlobal);
2917
2918     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
2919     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2920     checkForValidRankCountRequests(
2921             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
2922
2923     // DD grid setup uses a more different cell size limit for
2924     // automated setup than the one in systemInfo_. The latter is used
2925     // in set_dd_limits() to configure DLB, for example.
2926     const real gridSetupCellsizeLimit =
2927             getDDGridSetupCellSizeLimit(mdlog_,
2928                                         !isDlbDisabled(ddSettings_.initialDlbState),
2929                                         options_.dlbScaling,
2930                                         ir_,
2931                                         systemInfo_.cellsizeLimit);
2932     ddGridSetup_ = getDDGridSetup(mdlog_,
2933                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2934                                   cr->mpiDefaultCommunicator,
2935                                   numRanksRequested,
2936                                   options_,
2937                                   ddSettings_,
2938                                   systemInfo_,
2939                                   gridSetupCellsizeLimit,
2940                                   mtop_,
2941                                   ir_,
2942                                   box,
2943                                   xGlobal,
2944                                   &ddbox_);
2945     checkDDGridSetup(ddGridSetup_,
2946                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2947                      cr->mpiDefaultCommunicator,
2948                      cr->sizeOfDefaultCommunicator,
2949                      options_,
2950                      ddSettings_,
2951                      systemInfo_,
2952                      gridSetupCellsizeLimit,
2953                      ddbox_);
2954
2955     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
2956
2957     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
2958
2959     /* Generate the group communicator, also decides the duty of each rank */
2960     cartSetup_ = makeGroupCommunicators(
2961             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
2962 }
2963
2964 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
2965 {
2966     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
2967
2968     copy_ivec(ddCellIndex_, dd->ci);
2969
2970     dd->comm = init_dd_comm();
2971
2972     dd->comm->ddRankSetup        = ddRankSetup_;
2973     dd->comm->cartesianRankSetup = cartSetup_;
2974
2975     set_dd_limits(mdlog_,
2976                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2977                   dd,
2978                   options_,
2979                   ddSettings_,
2980                   systemInfo_,
2981                   ddGridSetup_,
2982                   ddRankSetup_.numPPRanks,
2983                   mtop_,
2984                   ir_,
2985                   ddbox_);
2986
2987     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
2988
2989     if (thisRankHasDuty(cr_, DUTY_PP))
2990     {
2991         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, mtop_, ir_, &ddbox_);
2992
2993         setup_neighbor_relations(dd);
2994     }
2995
2996     /* Set overallocation to avoid frequent reallocation of arrays */
2997     set_over_alloc_dd(true);
2998
2999     dd->atomSets = atomSets;
3000
3001     dd->localTopologyChecker = std::make_unique<LocalTopologyChecker>();
3002
3003     return dd;
3004 }
3005
3006 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3007                                                        t_commrec*           cr,
3008                                                        const DomdecOptions& options,
3009                                                        const MdrunOptions&  mdrunOptions,
3010                                                        const gmx_mtop_t&    mtop,
3011                                                        const t_inputrec&    ir,
3012                                                        const matrix         box,
3013                                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
3014                                                        const bool           useUpdateGroups,
3015                                                        const real           maxUpdateGroupRadius,
3016                                                        ArrayRef<const RVec> xGlobal) :
3017     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, xGlobal))
3018 {
3019 }
3020
3021 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3022 {
3023     return impl_->build(atomSets);
3024 }
3025
3026 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3027
3028 } // namespace gmx
3029
3030 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3031 {
3032     gmx_ddbox_t ddbox;
3033     int         LocallyLimited = 0;
3034
3035     const auto* dd = cr->dd;
3036
3037     set_ddbox(*dd, false, box, true, x, &ddbox);
3038
3039     LocallyLimited = 0;
3040
3041     for (int d = 0; d < dd->ndim; d++)
3042     {
3043         const int dim = dd->dim[d];
3044
3045         real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3046         if (dd->unitCellInfo.ddBoxIsDynamic)
3047         {
3048             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3049         }
3050
3051         const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3052
3053         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3054         {
3055             if (np > dd->comm->cd[d].np_dlb)
3056             {
3057                 return FALSE;
3058             }
3059
3060             /* If a current local cell size is smaller than the requested
3061              * cut-off, we could still fix it, but this gets very complicated.
3062              * Without fixing here, we might actually need more checks.
3063              */
3064             real cellSizeAlongDim =
3065                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3066             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3067             {
3068                 LocallyLimited = 1;
3069             }
3070         }
3071     }
3072
3073     if (!isDlbDisabled(dd->comm))
3074     {
3075         /* If DLB is not active yet, we don't need to check the grid jumps.
3076          * Actually we shouldn't, because then the grid jump data is not set.
3077          */
3078         if (isDlbOn(dd->comm) && gmx::check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3079         {
3080             LocallyLimited = 1;
3081         }
3082
3083         gmx_sumi(1, &LocallyLimited, cr);
3084
3085         if (LocallyLimited > 0)
3086         {
3087             return FALSE;
3088         }
3089     }
3090
3091     return TRUE;
3092 }
3093
3094 bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3095 {
3096     bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3097
3098     if (bCutoffAllowed)
3099     {
3100         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3101     }
3102
3103     return bCutoffAllowed;
3104 }
3105
3106 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3107                               const t_commrec&                cr,
3108                               const gmx::DeviceStreamManager& deviceStreamManager,
3109                               gmx_wallcycle*                  wcycle)
3110 {
3111     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3112                        "Local non-bonded stream should be valid when using"
3113                        "GPU halo exchange.");
3114     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3115                        "Non-local non-bonded stream should be valid when using "
3116                        "GPU halo exchange.");
3117
3118     if (cr.dd->gpuHaloExchange[0].empty())
3119     {
3120         GMX_LOG(mdlog.warning)
3121                 .asParagraph()
3122                 .appendTextFormatted(
3123                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3124                         "by the "
3125                         "GMX_GPU_DD_COMMS environment variable.");
3126     }
3127
3128     for (int d = 0; d < cr.dd->ndim; d++)
3129     {
3130         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3131         {
3132             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3133                     cr.dd,
3134                     d,
3135                     cr.mpi_comm_mygroup,
3136                     deviceStreamManager.context(),
3137                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3138                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3139                     pulse,
3140                     wcycle));
3141         }
3142     }
3143 }
3144
3145 void reinitGpuHaloExchange(const t_commrec&              cr,
3146                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3147                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3148 {
3149     for (int d = 0; d < cr.dd->ndim; d++)
3150     {
3151         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3152         {
3153             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3154         }
3155     }
3156 }
3157
3158 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3159                                    const matrix          box,
3160                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3161 {
3162     for (int d = 0; d < cr.dd->ndim; d++)
3163     {
3164         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3165         {
3166             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3167         }
3168     }
3169 }
3170
3171 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3172 {
3173     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3174     {
3175         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3176         {
3177             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3178         }
3179     }
3180 }