src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/options.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/gpu_utils/device_stream_manager.h"
  68 #include "gromacs/gpu_utils/gpu_utils.h"
  69 #include "gromacs/hardware/hw_info.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/math/vectypes.h"
  72 #include "gromacs/mdlib/calc_verletbuf.h"
  73 #include "gromacs/mdlib/constr.h"
  74 #include "gromacs/mdlib/constraintrange.h"
  75 #include "gromacs/mdlib/updategroups.h"
  76 #include "gromacs/mdlib/vsite.h"
  77 #include "gromacs/mdtypes/commrec.h"
  78 #include "gromacs/mdtypes/forceoutput.h"
  79 #include "gromacs/mdtypes/inputrec.h"
  80 #include "gromacs/mdtypes/mdrunoptions.h"
  81 #include "gromacs/mdtypes/state.h"
  82 #include "gromacs/pbcutil/ishift.h"
  83 #include "gromacs/pbcutil/pbc.h"
  84 #include "gromacs/pulling/pull.h"
  85 #include "gromacs/timing/wallcycle.h"
  86 #include "gromacs/topology/block.h"
  87 #include "gromacs/topology/idef.h"
  88 #include "gromacs/topology/ifunc.h"
  89 #include "gromacs/topology/mtop_lookup.h"
  90 #include "gromacs/topology/mtop_util.h"
  91 #include "gromacs/topology/topology.h"
  92 #include "gromacs/utility/basedefinitions.h"
  93 #include "gromacs/utility/basenetwork.h"
  94 #include "gromacs/utility/cstringutil.h"
  95 #include "gromacs/utility/enumerationhelpers.h"
  96 #include "gromacs/utility/exceptions.h"
  97 #include "gromacs/utility/fatalerror.h"
  98 #include "gromacs/utility/gmxmpi.h"
  99 #include "gromacs/utility/logger.h"
 100 #include "gromacs/utility/real.h"
 101 #include "gromacs/utility/smalloc.h"
 102 #include "gromacs/utility/strconvert.h"
 103 #include "gromacs/utility/stringstream.h"
 104 #include "gromacs/utility/stringutil.h"
 105 #include "gromacs/utility/textwriter.h"
 106
 107 #include "atomdistribution.h"
 108 #include "box.h"
 109 #include "cellsizes.h"
 110 #include "distribute.h"
 111 #include "domdec_constraints.h"
 112 #include "domdec_internal.h"
 113 #include "domdec_setup.h"
 114 #include "domdec_vsite.h"
 115 #include "redistribute.h"
 116 #include "utility.h"
 117
 118 // TODO remove this when moving domdec into gmx namespace
 119 using gmx::ArrayRef;
 120 using gmx::DdRankOrder;
 121 using gmx::DlbOption;
 122 using gmx::DomdecOptions;
 123 using gmx::RangePartitioning;
 124
 125 static const char* enumValueToString(DlbState enumValue)
 126 {
 127     static constexpr gmx::EnumerationArray<DlbState, const char*> dlbStateNames = {
 128         "off", "off", "auto", "locked", "on", "on"
 129     };
 130     return dlbStateNames[enumValue];
 131 }
 132
 133 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 134 #define DD_CGIBS 2
 135
 136 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 137 #define DD_FLAG_NRCG 65535
 138 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 139 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 140
 141 /* The DD zone order */
 142 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 143                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 144
 145 /* The non-bonded zone-pair setup for domain decomposition
 146  * The first number is the i-zone, the second number the first j-zone seen by
 147  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 148  * As is, this is for 3D decomposition, where there are 4 i-zones.
 149  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 150  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 151  */
 152 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 153                                                                { 1, 3, 6 },
 154                                                                { 2, 5, 6 },
 155                                                                { 3, 5, 7 } };
 156
 157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 158 {
 159     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 160     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 161     xyz[ZZ] = ind % nc[ZZ];
 162 }
 163
 164 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 165 {
 166     int ddnodeid = -1;
 167
 168     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 169     const int                 ddindex   = dd_index(dd->numCells, c);
 170     if (cartSetup.bCartesianPP_PME)
 171     {
 172         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 173     }
 174     else if (cartSetup.bCartesianPP)
 175     {
 176 #if GMX_MPI
 177         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 178 #endif
 179     }
 180     else
 181     {
 182         ddnodeid = ddindex;
 183     }
 184
 185     return ddnodeid;
 186 }
 187
 188 int ddglatnr(const gmx_domdec_t* dd, int i)
 189 {
 190     int atnr = 0;
 191
 192     if (dd == nullptr)
 193     {
 194         atnr = i + 1;
 195     }
 196     else
 197     {
 198         if (i >= dd->comm->atomRanges.numAtomsTotal())
 199         {
 200             gmx_fatal(FARGS,
 201                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 202                       i,
 203                       dd->comm->atomRanges.numAtomsTotal());
 204         }
 205         atnr = dd->globalAtomIndices[i] + 1;
 206     }
 207
 208     return atnr;
 209 }
 210
 211 void dd_store_state(const gmx_domdec_t& dd, t_state* state)
 212 {
 213     if (state->ddp_count != dd.ddp_count)
 214     {
 215         gmx_incons("The MD state does not match the domain decomposition state");
 216     }
 217
 218     state->cg_gl.resize(dd.ncg_home);
 219     for (int i = 0; i < dd.ncg_home; i++)
 220     {
 221         state->cg_gl[i] = dd.globalAtomGroupIndices[i];
 222     }
 223
 224     state->ddp_count_cg_gl = dd.ddp_count;
 225 }
 226
 227 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 228 {
 229     return &dd->comm->zones;
 230 }
 231
 232 int dd_numAtomsZones(const gmx_domdec_t& dd)
 233 {
 234     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 235 }
 236
 237 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 238 {
 239     return dd.comm->atomRanges.numHomeAtoms();
 240 }
 241
 242 int dd_natoms_mdatoms(const gmx_domdec_t& dd)
 243 {
 244     /* We currently set mdatoms entries for all atoms:
 245      * local + non-local + communicated for vsite + constraints
 246      */
 247
 248     return dd.comm->atomRanges.numAtomsTotal();
 249 }
 250
 251 int dd_natoms_vsite(const gmx_domdec_t& dd)
 252 {
 253     return dd.comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 254 }
 255
 256 void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
 257 {
 258     *at_start = dd.comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 259     *at_end   = dd.comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 260 }
 261
 262 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 263 {
 264     wallcycle_start(wcycle, WallCycleCounter::MoveX);
 265
 266     rvec shift = { 0, 0, 0 };
 267
 268     gmx_domdec_comm_t* comm = dd->comm;
 269
 270     int nzone   = 1;
 271     int nat_tot = comm->atomRanges.numHomeAtoms();
 272     for (int d = 0; d < dd->ndim; d++)
 273     {
 274         const bool bPBC   = (dd->ci[dd->dim[d]] == 0);
 275         const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 276         if (bPBC)
 277         {
 278             copy_rvec(box[dd->dim[d]], shift);
 279         }
 280         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 281         for (const gmx_domdec_ind_t& ind : cd->ind)
 282         {
 283             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 284             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 285             int                       n          = 0;
 286             if (!bPBC)
 287             {
 288                 for (int j : ind.index)
 289                 {
 290                     sendBuffer[n] = x[j];
 291                     n++;
 292                 }
 293             }
 294             else if (!bScrew)
 295             {
 296                 for (int j : ind.index)
 297                 {
 298                     /* We need to shift the coordinates */
 299                     for (int d = 0; d < DIM; d++)
 300                     {
 301                         sendBuffer[n][d] = x[j][d] + shift[d];
 302                     }
 303                     n++;
 304                 }
 305             }
 306             else
 307             {
 308                 for (int j : ind.index)
 309                 {
 310                     /* Shift x */
 311                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 312                     /* Rotate y and z.
 313                      * This operation requires a special shift force
 314                      * treatment, which is performed in calc_vir.
 315                      */
 316                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 317                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 318                     n++;
 319                 }
 320             }
 321
 322             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 323                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 324
 325             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 326             if (cd->receiveInPlace)
 327             {
 328                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 329             }
 330             else
 331             {
 332                 receiveBuffer = receiveBufferAccess.buffer;
 333             }
 334             /* Send and receive the coordinates */
 335             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 336
 337             if (!cd->receiveInPlace)
 338             {
 339                 int j = 0;
 340                 for (int zone = 0; zone < nzone; zone++)
 341                 {
 342                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 343                     {
 344                         x[i] = receiveBuffer[j++];
 345                     }
 346                 }
 347             }
 348             nat_tot += ind.nrecv[nzone + 1];
 349         }
 350         nzone += nzone;
 351     }
 352
 353     wallcycle_stop(wcycle, WallCycleCounter::MoveX);
 354 }
 355
 356 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 357 {
 358     wallcycle_start(wcycle, WallCycleCounter::MoveF);
 359
 360     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 361     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 362
 363     gmx_domdec_comm_t& comm    = *dd->comm;
 364     int                nzone   = comm.zones.n / 2;
 365     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 366     for (int d = dd->ndim - 1; d >= 0; d--)
 367     {
 368         /* Only forces in domains near the PBC boundaries need to
 369            consider PBC in the treatment of fshift */
 370         const bool shiftForcesNeedPbc =
 371                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 372         const bool applyScrewPbc =
 373                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 374         /* Determine which shift vector we need */
 375         ivec vis        = { 0, 0, 0 };
 376         vis[dd->dim[d]] = 1;
 377         const int is    = gmx::ivecToShiftIndex(vis);
 378
 379         /* Loop over the pulses */
 380         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 381         for (int p = cd.numPulses() - 1; p >= 0; p--)
 382         {
 383             const gmx_domdec_ind_t&   ind = cd.ind[p];
 384             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 385             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 386
 387             nat_tot -= ind.nrecv[nzone + 1];
 388
 389             DDBufferAccess<gmx::RVec> sendBufferAccess(
 390                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 391
 392             gmx::ArrayRef<gmx::RVec> sendBuffer;
 393             if (cd.receiveInPlace)
 394             {
 395                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 396             }
 397             else
 398             {
 399                 sendBuffer = sendBufferAccess.buffer;
 400                 int j      = 0;
 401                 for (int zone = 0; zone < nzone; zone++)
 402                 {
 403                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 404                     {
 405                         sendBuffer[j++] = f[i];
 406                     }
 407                 }
 408             }
 409             /* Communicate the forces */
 410             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 411             /* Add the received forces */
 412             int n = 0;
 413             if (!shiftForcesNeedPbc)
 414             {
 415                 for (int j : ind.index)
 416                 {
 417                     for (int d = 0; d < DIM; d++)
 418                     {
 419                         f[j][d] += receiveBuffer[n][d];
 420                     }
 421                     n++;
 422                 }
 423             }
 424             else if (!applyScrewPbc)
 425             {
 426                 for (int j : ind.index)
 427                 {
 428                     for (int d = 0; d < DIM; d++)
 429                     {
 430                         f[j][d] += receiveBuffer[n][d];
 431                     }
 432                     /* Add this force to the shift force */
 433                     for (int d = 0; d < DIM; d++)
 434                     {
 435                         fshift[is][d] += receiveBuffer[n][d];
 436                     }
 437                     n++;
 438                 }
 439             }
 440             else
 441             {
 442                 for (int j : ind.index)
 443                 {
 444                     /* Rotate the force */
 445                     f[j][XX] += receiveBuffer[n][XX];
 446                     f[j][YY] -= receiveBuffer[n][YY];
 447                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 448                     if (shiftForcesNeedPbc)
 449                     {
 450                         /* Add this force to the shift force */
 451                         for (int d = 0; d < DIM; d++)
 452                         {
 453                             fshift[is][d] += receiveBuffer[n][d];
 454                         }
 455                     }
 456                     n++;
 457                 }
 458             }
 459         }
 460         nzone /= 2;
 461     }
 462     wallcycle_stop(wcycle, WallCycleCounter::MoveF);
 463 }
 464
 465 /* Convenience function for extracting a real buffer from an rvec buffer
 466  *
 467  * To reduce the number of temporary communication buffers and avoid
 468  * cache polution, we reuse gmx::RVec buffers for storing reals.
 469  * This functions return a real buffer reference with the same number
 470  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 471  */
 472 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 473 {
 474     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 475 }
 476
 477 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 478 {
 479     gmx_domdec_comm_t* comm = dd->comm;
 480
 481     int nzone   = 1;
 482     int nat_tot = comm->atomRanges.numHomeAtoms();
 483     for (int d = 0; d < dd->ndim; d++)
 484     {
 485         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 486         for (const gmx_domdec_ind_t& ind : cd->ind)
 487         {
 488             /* Note: We provision for RVec instead of real, so a factor of 3
 489              * more than needed. The buffer actually already has this size
 490              * and we pass a plain pointer below, so this does not matter.
 491              */
 492             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 493             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 494             int                 n          = 0;
 495             for (int j : ind.index)
 496             {
 497                 sendBuffer[n++] = v[j];
 498             }
 499
 500             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 501                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 502
 503             gmx::ArrayRef<real> receiveBuffer;
 504             if (cd->receiveInPlace)
 505             {
 506                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 507             }
 508             else
 509             {
 510                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 511             }
 512             /* Send and receive the data */
 513             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 514             if (!cd->receiveInPlace)
 515             {
 516                 int j = 0;
 517                 for (int zone = 0; zone < nzone; zone++)
 518                 {
 519                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 520                     {
 521                         v[i] = receiveBuffer[j++];
 522                     }
 523                 }
 524             }
 525             nat_tot += ind.nrecv[nzone + 1];
 526         }
 527         nzone += nzone;
 528     }
 529 }
 530
 531 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 532 {
 533     gmx_domdec_comm_t* comm = dd->comm;
 534
 535     int nzone   = comm->zones.n / 2;
 536     int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 537     for (int d = dd->ndim - 1; d >= 0; d--)
 538     {
 539         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 540         for (int p = cd->numPulses() - 1; p >= 0; p--)
 541         {
 542             const gmx_domdec_ind_t& ind = cd->ind[p];
 543
 544             /* Note: We provision for RVec instead of real, so a factor of 3
 545              * more than needed. The buffer actually already has this size
 546              * and we typecast, so this works as intended.
 547              */
 548             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 549             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 550             nat_tot -= ind.nrecv[nzone + 1];
 551
 552             DDBufferAccess<gmx::RVec> sendBufferAccess(
 553                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 554
 555             gmx::ArrayRef<real> sendBuffer;
 556             if (cd->receiveInPlace)
 557             {
 558                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 559             }
 560             else
 561             {
 562                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 563                 int j      = 0;
 564                 for (int zone = 0; zone < nzone; zone++)
 565                 {
 566                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 567                     {
 568                         sendBuffer[j++] = v[i];
 569                     }
 570                 }
 571             }
 572             /* Communicate the forces */
 573             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 574             /* Add the received forces */
 575             int n = 0;
 576             for (int j : ind.index)
 577             {
 578                 v[j] += receiveBuffer[n];
 579                 n++;
 580             }
 581         }
 582         nzone /= 2;
 583     }
 584 }
 585
 586 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 587 {
 588     const gmx_domdec_comm_t& comm       = *dd->comm;
 589     const DDSystemInfo&      systemInfo = comm.systemInfo;
 590
 591     real r = -1;
 592     if (systemInfo.haveInterDomainMultiBodyBondeds)
 593     {
 594         if (comm.cutoff_mbody > 0)
 595         {
 596             r = comm.cutoff_mbody;
 597         }
 598         else
 599         {
 600             /* cutoff_mbody=0 means we do not have DLB */
 601             r = comm.cellsize_min[dd->dim[0]];
 602             for (int di = 1; di < dd->ndim; di++)
 603             {
 604                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 605             }
 606             if (comm.systemInfo.filterBondedCommunication)
 607             {
 608                 r = std::max(r, comm.cutoff_mbody);
 609             }
 610             else
 611             {
 612                 r = std::min(r, systemInfo.cutoff);
 613             }
 614         }
 615     }
 616
 617     return r;
 618 }
 619
 620 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 621 {
 622     const real r_mb = dd_cutoff_multibody(dd);
 623
 624     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 625 }
 626
 627
 628 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 629                                    const CartesianRankSetup& cartSetup,
 630                                    const ivec                coord,
 631                                    ivec                      coord_pme)
 632 {
 633     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 634     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 635     copy_ivec(coord, coord_pme);
 636     coord_pme[cartSetup.cartpmedim] =
 637             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 638 }
 639
 640 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 641 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 642 {
 643     const int npp  = ddRankSetup.numPPRanks;
 644     const int npme = ddRankSetup.numRanksDoingPme;
 645
 646     /* Here we assign a PME node to communicate with this DD node
 647      * by assuming that the major index of both is x.
 648      * We add npme/2 to obtain an even distribution.
 649      */
 650     return (ddCellIndex * npme + npme / 2) / npp;
 651 }
 652
 653 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 654 {
 655     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 656
 657     int n = 0;
 658     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 659     {
 660         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 661         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 662         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 663         {
 664             if (debug)
 665             {
 666                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 667             }
 668             pmeRanks[n] = i + 1 + n;
 669             n++;
 670         }
 671     }
 672
 673     return pmeRanks;
 674 }
 675
 676 static int gmx_ddcoord2pmeindex(const gmx_domdec_t& dd, int x, int y, int z)
 677 {
 678     ivec coords;
 679
 680     coords[XX]     = x;
 681     coords[YY]     = y;
 682     coords[ZZ]     = z;
 683     const int slab = ddindex2pmeindex(dd.comm->ddRankSetup, dd_index(dd.numCells, coords));
 684
 685     return slab;
 686 }
 687
 688 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 689 {
 690     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 691     ivec                      coords    = { x, y, z };
 692     int                       nodeid    = -1;
 693
 694     if (cartSetup.bCartesianPP_PME)
 695     {
 696 #if GMX_MPI
 697         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 698 #endif
 699     }
 700     else
 701     {
 702         int ddindex = dd_index(cr->dd->numCells, coords);
 703         if (cartSetup.bCartesianPP)
 704         {
 705             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 706         }
 707         else
 708         {
 709             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 710             {
 711                 nodeid = ddindex + gmx_ddcoord2pmeindex(*cr->dd, x, y, z);
 712             }
 713             else
 714             {
 715                 nodeid = ddindex;
 716             }
 717         }
 718     }
 719
 720     return nodeid;
 721 }
 722
 723 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 724                               const CartesianRankSetup& cartSetup,
 725                               gmx::ArrayRef<const int>  pmeRanks,
 726                               const t_commrec gmx_unused* cr,
 727                               const int                   sim_nodeid)
 728 {
 729     int pmenode = -1;
 730
 731     /* This assumes a uniform x domain decomposition grid cell size */
 732     if (cartSetup.bCartesianPP_PME)
 733     {
 734 #if GMX_MPI
 735         ivec coord, coord_pme;
 736         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 737         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 738         {
 739             /* This is a PP rank */
 740             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 741             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 742         }
 743 #endif
 744     }
 745     else if (cartSetup.bCartesianPP)
 746     {
 747         if (sim_nodeid < ddRankSetup.numPPRanks)
 748         {
 749             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 750         }
 751     }
 752     else
 753     {
 754         /* This assumes DD cells with identical x coordinates
 755          * are numbered sequentially.
 756          */
 757         if (pmeRanks.empty())
 758         {
 759             if (sim_nodeid < ddRankSetup.numPPRanks)
 760             {
 761                 /* The DD index equals the nodeid */
 762                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 763             }
 764         }
 765         else
 766         {
 767             int i = 0;
 768             while (sim_nodeid > pmeRanks[i])
 769             {
 770                 i++;
 771             }
 772             if (sim_nodeid < pmeRanks[i])
 773             {
 774                 pmenode = pmeRanks[i];
 775             }
 776         }
 777     }
 778
 779     return pmenode;
 780 }
 781
 782 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 783 {
 784     if (dd != nullptr)
 785     {
 786         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 787     }
 788     else
 789     {
 790         return { 1, 1 };
 791     }
 792 }
 793
 794 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 795 {
 796     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 797     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 798     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 799                        "This function should only be called when PME-only ranks are in use");
 800     std::vector<int> ddranks;
 801     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 802
 803     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 804     {
 805         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 806         {
 807             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 808             {
 809                 if (cartSetup.bCartesianPP_PME)
 810                 {
 811                     ivec coord = { x, y, z };
 812                     ivec coord_pme;
 813                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 814                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 815                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 816                     {
 817                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 818                     }
 819                 }
 820                 else
 821                 {
 822                     /* The slab corresponds to the nodeid in the PME group */
 823                     if (gmx_ddcoord2pmeindex(*cr->dd, x, y, z) == pmenodeid)
 824                     {
 825                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 826                     }
 827                 }
 828             }
 829         }
 830     }
 831     return ddranks;
 832 }
 833
 834 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 835 {
 836     bool bReceive = true;
 837
 838     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 839     if (ddRankSetup.usePmeOnlyRanks)
 840     {
 841         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 842         if (cartSetup.bCartesianPP_PME)
 843         {
 844 #if GMX_MPI
 845             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 846             ivec coords;
 847             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 848             coords[cartSetup.cartpmedim]++;
 849             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 850             {
 851                 int rank = 0;
 852                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 853                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 854                 {
 855                     /* This is not the last PP node for pmenode */
 856                     bReceive = FALSE;
 857                 }
 858             }
 859 #else
 860             GMX_RELEASE_ASSERT(
 861                     false,
 862                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 863 #endif
 864         }
 865         else
 866         {
 867             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 868             if (cr->sim_nodeid + 1 < cr->nnodes
 869                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 870             {
 871                 /* This is not the last PP node for pmenode */
 872                 bReceive = FALSE;
 873             }
 874         }
 875     }
 876
 877     return bReceive;
 878 }
 879
 880 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 881 {
 882     gmx_domdec_comm_t* comm = dd->comm;
 883
 884     snew(*dim_f, dd->numCells[dim] + 1);
 885     (*dim_f)[0] = 0;
 886     for (int i = 1; i < dd->numCells[dim]; i++)
 887     {
 888         if (comm->slb_frac[dim])
 889         {
 890             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 891         }
 892         else
 893         {
 894             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 895         }
 896     }
 897     (*dim_f)[dd->numCells[dim]] = 1;
 898 }
 899
 900 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 901 {
 902     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 903
 904     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 905     {
 906         ddpme->dim = YY;
 907     }
 908     else
 909     {
 910         ddpme->dim = dimind;
 911     }
 912     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 913
 914     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 915
 916     if (ddpme->nslab <= 1)
 917     {
 918         return;
 919     }
 920
 921     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 922     /* Determine for each PME slab the PP location range for dimension dim */
 923     snew(ddpme->pp_min, ddpme->nslab);
 924     snew(ddpme->pp_max, ddpme->nslab);
 925     for (int slab = 0; slab < ddpme->nslab; slab++)
 926     {
 927         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 928         ddpme->pp_max[slab] = 0;
 929     }
 930     for (int i = 0; i < dd->nnodes; i++)
 931     {
 932         ivec xyz;
 933         ddindex2xyz(dd->numCells, i, xyz);
 934         /* For y only use our y/z slab.
 935          * This assumes that the PME x grid size matches the DD grid size.
 936          */
 937         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 938         {
 939             const int pmeindex  = ddindex2pmeindex(ddRankSetup, i);
 940             const int slab      = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
 941             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 942             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 943         }
 944     }
 945
 946     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 947 }
 948
 949 int dd_pme_maxshift_x(const gmx_domdec_t& dd)
 950 {
 951     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
 952
 953     if (ddRankSetup.ddpme[0].dim == XX)
 954     {
 955         return ddRankSetup.ddpme[0].maxshift;
 956     }
 957     else
 958     {
 959         return 0;
 960     }
 961 }
 962
 963 int dd_pme_maxshift_y(const gmx_domdec_t& dd)
 964 {
 965     const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
 966
 967     if (ddRankSetup.ddpme[0].dim == YY)
 968     {
 969         return ddRankSetup.ddpme[0].maxshift;
 970     }
 971     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 972     {
 973         return ddRankSetup.ddpme[1].maxshift;
 974     }
 975     else
 976     {
 977         return 0;
 978     }
 979 }
 980
 981 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
 982 {
 983     return dd.comm->systemInfo.useUpdateGroups;
 984 }
 985
 986 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
 987 {
 988     /* Note that the cycles value can be incorrect, either 0 or some
 989      * extremely large value, when our thread migrated to another core
 990      * with an unsynchronized cycle counter. If this happens less often
 991      * that once per nstlist steps, this will not cause issues, since
 992      * we later subtract the maximum value from the sum over nstlist steps.
 993      * A zero count will slightly lower the total, but that's a small effect.
 994      * Note that the main purpose of the subtraction of the maximum value
 995      * is to avoid throwing off the load balancing when stalls occur due
 996      * e.g. system activity or network congestion.
 997      */
 998     dd->comm->cycl[ddCycl] += cycles;
 999     dd->comm->cycl_n[ddCycl]++;
1000     if (cycles > dd->comm->cycl_max[ddCycl])
1001     {
1002         dd->comm->cycl_max[ddCycl] = cycles;
1003     }
1004 }
1005
1006 #if GMX_MPI
1007 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1008 {
1009     MPI_Comm c_row = MPI_COMM_NULL;
1010     ivec     loc_c;
1011     bool     bPartOfGroup = false;
1012
1013     const int dim = dd->dim[dim_ind];
1014     copy_ivec(loc, loc_c);
1015     for (int i = 0; i < dd->numCells[dim]; i++)
1016     {
1017         loc_c[dim]     = i;
1018         const int rank = dd_index(dd->numCells, loc_c);
1019         if (rank == dd->rank)
1020         {
1021             /* This process is part of the group */
1022             bPartOfGroup = TRUE;
1023         }
1024     }
1025     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1026     if (bPartOfGroup)
1027     {
1028         dd->comm->mpi_comm_load[dim_ind] = c_row;
1029         if (!isDlbDisabled(dd->comm))
1030         {
1031             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1032
1033             if (dd->ci[dim] == dd->master_ci[dim])
1034             {
1035                 /* This is the root process of this row */
1036                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1037
1038                 RowMaster& rowMaster = *cellsizes.rowMaster;
1039                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1040                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1041                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1042                 if (dim_ind > 0)
1043                 {
1044                     rowMaster.bounds.resize(dd->numCells[dim]);
1045                 }
1046                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1047             }
1048             else
1049             {
1050                 /* This is not a root process, we only need to receive cell_f */
1051                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1052             }
1053         }
1054         if (dd->ci[dim] == dd->master_ci[dim])
1055         {
1056             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1057         }
1058     }
1059 }
1060 #endif
1061
1062 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1063 {
1064 #if GMX_MPI
1065     MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
1066
1067     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1068     {
1069         /* Only ranks with short-ranged tasks (currently) use GPUs.
1070          * If we don't have GPUs assigned, there are no resources to share.
1071          */
1072         return;
1073     }
1074
1075     const int physicalnode_id_hash = gmx_physicalnode_id_hash();
1076
1077     gmx_domdec_t* dd = cr->dd;
1078
1079     if (debug)
1080     {
1081         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1082         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1083     }
1084     /* Split the PP communicator over the physical nodes */
1085     /* TODO: See if we should store this (before), as it's also used for
1086      * for the nodecomm summation.
1087      */
1088     // TODO PhysicalNodeCommunicator could be extended/used to handle
1089     // the need for per-node per-group communicators.
1090     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1091     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1092     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1093     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1094
1095     if (debug)
1096     {
1097         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1098     }
1099
1100     /* Note that some ranks could share a GPU, while others don't */
1101
1102     if (dd->comm->nrank_gpu_shared == 1)
1103     {
1104         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1105     }
1106 #else
1107     GMX_UNUSED_VALUE(cr);
1108     GMX_UNUSED_VALUE(gpu_id);
1109 #endif
1110 }
1111
1112 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1113 {
1114 #if GMX_MPI
1115     ivec loc;
1116
1117     if (debug)
1118     {
1119         fprintf(debug, "Making load communicators\n");
1120     }
1121
1122     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1123     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1124
1125     if (dd->ndim == 0)
1126     {
1127         return;
1128     }
1129
1130     clear_ivec(loc);
1131     make_load_communicator(dd, 0, loc);
1132     if (dd->ndim > 1)
1133     {
1134         const int dim0 = dd->dim[0];
1135         for (int i = 0; i < dd->numCells[dim0]; i++)
1136         {
1137             loc[dim0] = i;
1138             make_load_communicator(dd, 1, loc);
1139         }
1140     }
1141     if (dd->ndim > 2)
1142     {
1143         const int dim0 = dd->dim[0];
1144         for (int i = 0; i < dd->numCells[dim0]; i++)
1145         {
1146             loc[dim0]      = i;
1147             const int dim1 = dd->dim[1];
1148             for (int j = 0; j < dd->numCells[dim1]; j++)
1149             {
1150                 loc[dim1] = j;
1151                 make_load_communicator(dd, 2, loc);
1152             }
1153         }
1154     }
1155
1156     if (debug)
1157     {
1158         fprintf(debug, "Finished making load communicators\n");
1159     }
1160 #endif
1161 }
1162
1163 /*! \brief Sets up the relation between neighboring domains and zones */
1164 static void setup_neighbor_relations(gmx_domdec_t* dd)
1165 {
1166     ivec tmp, s;
1167     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1168
1169     for (int d = 0; d < dd->ndim; d++)
1170     {
1171         const int dim = dd->dim[d];
1172         copy_ivec(dd->ci, tmp);
1173         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1174         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1175         copy_ivec(dd->ci, tmp);
1176         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1177         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1178         if (debug)
1179         {
1180             fprintf(debug,
1181                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1182                     dd->rank,
1183                     dim,
1184                     dd->neighbor[d][0],
1185                     dd->neighbor[d][1]);
1186         }
1187     }
1188
1189     int nzone  = (1 << dd->ndim);
1190     int nizone = (1 << std::max(dd->ndim - 1, 0));
1191     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1192
1193     gmx_domdec_zones_t* zones = &dd->comm->zones;
1194
1195     for (int i = 0; i < nzone; i++)
1196     {
1197         int m = 0;
1198         clear_ivec(zones->shift[i]);
1199         for (int d = 0; d < dd->ndim; d++)
1200         {
1201             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1202         }
1203     }
1204
1205     zones->n = nzone;
1206     for (int i = 0; i < nzone; i++)
1207     {
1208         for (int d = 0; d < DIM; d++)
1209         {
1210             s[d] = dd->ci[d] - zones->shift[i][d];
1211             if (s[d] < 0)
1212             {
1213                 s[d] += dd->numCells[d];
1214             }
1215             else if (s[d] >= dd->numCells[d])
1216             {
1217                 s[d] -= dd->numCells[d];
1218             }
1219         }
1220     }
1221     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1222     {
1223         GMX_RELEASE_ASSERT(
1224                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1225                 "The first element for each ddNonbondedZonePairRanges should match its index");
1226
1227         DDPairInteractionRanges iZone;
1228         iZone.iZoneIndex = iZoneIndex;
1229         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1230          * j-zones up to nzone.
1231          */
1232         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1233                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1234         for (int dim = 0; dim < DIM; dim++)
1235         {
1236             if (dd->numCells[dim] == 1)
1237             {
1238                 /* All shifts should be allowed */
1239                 iZone.shift0[dim] = -1;
1240                 iZone.shift1[dim] = 1;
1241             }
1242             else
1243             {
1244                 /* Determine the min/max j-zone shift wrt the i-zone */
1245                 iZone.shift0[dim] = 1;
1246                 iZone.shift1[dim] = -1;
1247                 for (int jZone : iZone.jZoneRange)
1248                 {
1249                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1250                     if (shift_diff < iZone.shift0[dim])
1251                     {
1252                         iZone.shift0[dim] = shift_diff;
1253                     }
1254                     if (shift_diff > iZone.shift1[dim])
1255                     {
1256                         iZone.shift1[dim] = shift_diff;
1257                     }
1258                 }
1259             }
1260         }
1261
1262         zones->iZones.push_back(iZone);
1263     }
1264
1265     if (!isDlbDisabled(dd->comm))
1266     {
1267         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1268     }
1269
1270     if (dd->comm->ddSettings.recordLoad)
1271     {
1272         make_load_communicators(dd);
1273     }
1274 }
1275
1276 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1277                                  gmx_domdec_t*        dd,
1278                                  t_commrec gmx_unused* cr,
1279                                  bool gmx_unused       reorder)
1280 {
1281 #if GMX_MPI
1282     gmx_domdec_comm_t*  comm      = dd->comm;
1283     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1284
1285     if (cartSetup.bCartesianPP)
1286     {
1287         /* Set up cartesian communication for the particle-particle part */
1288         GMX_LOG(mdlog.info)
1289                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1290                                      dd->numCells[XX],
1291                                      dd->numCells[YY],
1292                                      dd->numCells[ZZ]);
1293
1294         ivec periods;
1295         for (int i = 0; i < DIM; i++)
1296         {
1297             periods[i] = TRUE;
1298         }
1299         MPI_Comm comm_cart = MPI_COMM_NULL;
1300         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1301         /* We overwrite the old communicator with the new cartesian one */
1302         cr->mpi_comm_mygroup = comm_cart;
1303     }
1304
1305     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1306     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1307
1308     if (cartSetup.bCartesianPP_PME)
1309     {
1310         /* Since we want to use the original cartesian setup for sim,
1311          * and not the one after split, we need to make an index.
1312          */
1313         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1314         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1315         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1316         /* Get the rank of the DD master,
1317          * above we made sure that the master node is a PP node.
1318          */
1319         int rank = MASTER(cr) ? dd->rank : 0;
1320         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1321     }
1322     else if (cartSetup.bCartesianPP)
1323     {
1324         if (!comm->ddRankSetup.usePmeOnlyRanks)
1325         {
1326             /* The PP communicator is also
1327              * the communicator for this simulation
1328              */
1329             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1330         }
1331         cr->nodeid = dd->rank;
1332
1333         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1334
1335         /* We need to make an index to go from the coordinates
1336          * to the nodeid of this simulation.
1337          */
1338         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1339         std::vector<int> buf(dd->nnodes);
1340         if (thisRankHasDuty(cr, DUTY_PP))
1341         {
1342             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1343         }
1344         /* Communicate the ddindex to simulation nodeid index */
1345         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1346
1347         /* Determine the master coordinates and rank.
1348          * The DD master should be the same node as the master of this sim.
1349          */
1350         for (int i = 0; i < dd->nnodes; i++)
1351         {
1352             if (cartSetup.ddindex2simnodeid[i] == 0)
1353             {
1354                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1355                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1356             }
1357         }
1358         if (debug)
1359         {
1360             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1361         }
1362     }
1363     else
1364     {
1365         /* No Cartesian communicators */
1366         /* We use the rank in dd->comm->all as DD index */
1367         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1368         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1369         dd->masterrank = 0;
1370         clear_ivec(dd->master_ci);
1371     }
1372 #endif
1373
1374     GMX_LOG(mdlog.info)
1375             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1376                                  dd->rank,
1377                                  dd->ci[XX],
1378                                  dd->ci[YY],
1379                                  dd->ci[ZZ]);
1380     if (debug)
1381     {
1382         fprintf(debug,
1383                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1384                 dd->rank,
1385                 dd->ci[XX],
1386                 dd->ci[YY],
1387                 dd->ci[ZZ]);
1388     }
1389 }
1390
1391 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1392 {
1393 #if GMX_MPI
1394     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1395
1396     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1397     {
1398         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1399         std::vector<int> buf(dd->nnodes);
1400         if (thisRankHasDuty(cr, DUTY_PP))
1401         {
1402             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1403         }
1404         /* Communicate the ddindex to simulation nodeid index */
1405         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1406     }
1407 #else
1408     GMX_UNUSED_VALUE(dd);
1409     GMX_UNUSED_VALUE(cr);
1410 #endif
1411 }
1412
1413 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1414                                              t_commrec*           cr,
1415                                              const DdRankOrder    ddRankOrder,
1416                                              bool gmx_unused      reorder,
1417                                              const DDRankSetup&   ddRankSetup,
1418                                              ivec                 ddCellIndex,
1419                                              std::vector<int>*    pmeRanks)
1420 {
1421     CartesianRankSetup cartSetup;
1422
1423     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1424     cartSetup.bCartesianPP_PME = false;
1425
1426     const ivec& numDDCells = ddRankSetup.numPPCells;
1427     /* Initially we set ntot to the number of PP cells */
1428     copy_ivec(numDDCells, cartSetup.ntot);
1429
1430     if (cartSetup.bCartesianPP)
1431     {
1432         const int numDDCellsTot = ddRankSetup.numPPRanks;
1433         bool      bDiv[DIM];
1434         for (int i = 1; i < DIM; i++)
1435         {
1436             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1437         }
1438         if (bDiv[YY] || bDiv[ZZ])
1439         {
1440             cartSetup.bCartesianPP_PME = TRUE;
1441             /* If we have 2D PME decomposition, which is always in x+y,
1442              * we stack the PME only nodes in z.
1443              * Otherwise we choose the direction that provides the thinnest slab
1444              * of PME only nodes as this will have the least effect
1445              * on the PP communication.
1446              * But for the PME communication the opposite might be better.
1447              */
1448             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1449             {
1450                 cartSetup.cartpmedim = ZZ;
1451             }
1452             else
1453             {
1454                 cartSetup.cartpmedim = YY;
1455             }
1456             cartSetup.ntot[cartSetup.cartpmedim] +=
1457                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1458         }
1459         else
1460         {
1461             GMX_LOG(mdlog.info)
1462                     .appendTextFormatted(
1463                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1464                             "nx*nz (%d*%d)",
1465                             ddRankSetup.numRanksDoingPme,
1466                             numDDCells[XX],
1467                             numDDCells[YY],
1468                             numDDCells[XX],
1469                             numDDCells[ZZ]);
1470             GMX_LOG(mdlog.info)
1471                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1472         }
1473     }
1474
1475     if (cartSetup.bCartesianPP_PME)
1476     {
1477 #if GMX_MPI
1478         int  rank = 0;
1479         ivec periods;
1480
1481         GMX_LOG(mdlog.info)
1482                 .appendTextFormatted(
1483                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1484                         cartSetup.ntot[XX],
1485                         cartSetup.ntot[YY],
1486                         cartSetup.ntot[ZZ]);
1487
1488         for (int i = 0; i < DIM; i++)
1489         {
1490             periods[i] = TRUE;
1491         }
1492         MPI_Comm comm_cart = MPI_COMM_NULL;
1493         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1494         MPI_Comm_rank(comm_cart, &rank);
1495         if (MASTER(cr) && rank != 0)
1496         {
1497             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1498         }
1499
1500         /* With this assigment we loose the link to the original communicator
1501          * which will usually be MPI_COMM_WORLD, unless have multisim.
1502          */
1503         cr->mpi_comm_mysim = comm_cart;
1504         cr->sim_nodeid     = rank;
1505
1506         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1507
1508         GMX_LOG(mdlog.info)
1509                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1510                                      cr->sim_nodeid,
1511                                      ddCellIndex[XX],
1512                                      ddCellIndex[YY],
1513                                      ddCellIndex[ZZ]);
1514
1515         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1516         {
1517             cr->duty = DUTY_PP;
1518         }
1519         if (!ddRankSetup.usePmeOnlyRanks
1520             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1521         {
1522             cr->duty = DUTY_PME;
1523         }
1524
1525         /* Split the sim communicator into PP and PME only nodes */
1526         MPI_Comm_split(cr->mpi_comm_mysim,
1527                        getThisRankDuties(cr),
1528                        dd_index(cartSetup.ntot, ddCellIndex),
1529                        &cr->mpi_comm_mygroup);
1530 #else
1531         GMX_UNUSED_VALUE(ddCellIndex);
1532 #endif
1533     }
1534     else
1535     {
1536         switch (ddRankOrder)
1537         {
1538             case DdRankOrder::pp_pme:
1539                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1540                 break;
1541             case DdRankOrder::interleave:
1542                 /* Interleave the PP-only and PME-only ranks */
1543                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1544                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1545                 break;
1546             case DdRankOrder::cartesian: break;
1547             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1548         }
1549
1550         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1551         {
1552             cr->duty = DUTY_PME;
1553         }
1554         else
1555         {
1556             cr->duty = DUTY_PP;
1557         }
1558 #if GMX_MPI
1559         /* Split the sim communicator into PP and PME only nodes */
1560         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1561         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1562 #endif
1563     }
1564
1565     GMX_LOG(mdlog.info)
1566             .appendTextFormatted("This rank does only %s work.\n",
1567                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1568
1569     return cartSetup;
1570 }
1571
1572 /*! \brief Makes the PP communicator and the PME communicator, when needed
1573  *
1574  * Returns the Cartesian rank setup.
1575  * Sets \p cr->mpi_comm_mygroup
1576  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1577  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1578  */
1579 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1580                                                  const DDSettings&    ddSettings,
1581                                                  const DdRankOrder    ddRankOrder,
1582                                                  const DDRankSetup&   ddRankSetup,
1583                                                  t_commrec*           cr,
1584                                                  ivec                 ddCellIndex,
1585                                                  std::vector<int>*    pmeRanks)
1586 {
1587     CartesianRankSetup cartSetup;
1588
1589     // As a default, both group and sim communicators are equal to the default communicator
1590     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1591     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1592     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1593     cr->nodeid           = cr->rankInDefaultCommunicator;
1594     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1595
1596     if (ddRankSetup.usePmeOnlyRanks)
1597     {
1598         /* Split the communicator into a PP and PME part */
1599         cartSetup = split_communicator(
1600                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1601     }
1602     else
1603     {
1604         /* All nodes do PP and PME */
1605         /* We do not require separate communicators */
1606         cartSetup.bCartesianPP     = false;
1607         cartSetup.bCartesianPP_PME = false;
1608     }
1609
1610     return cartSetup;
1611 }
1612
1613 /*! \brief For PP ranks, sets or makes the communicator
1614  *
1615  * For PME ranks get the rank id.
1616  * For PP only ranks, sets the PME-only rank.
1617  */
1618 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1619                                     const DDSettings&        ddSettings,
1620                                     gmx::ArrayRef<const int> pmeRanks,
1621                                     t_commrec*               cr,
1622                                     const int                numAtomsInSystem,
1623                                     gmx_domdec_t*            dd)
1624 {
1625     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1626     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1627
1628     if (thisRankHasDuty(cr, DUTY_PP))
1629     {
1630         /* Copy or make a new PP communicator */
1631
1632         /* We (possibly) reordered the nodes in split_communicator,
1633          * so it is no longer required in make_pp_communicator.
1634          */
1635         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1636
1637         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1638     }
1639     else
1640     {
1641         receive_ddindex2simnodeid(dd, cr);
1642     }
1643
1644     if (!thisRankHasDuty(cr, DUTY_PME))
1645     {
1646         /* Set up the commnuication to our PME node */
1647         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1648         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1649         if (debug)
1650         {
1651             fprintf(debug,
1652                     "My pme_nodeid %d receive ener %s\n",
1653                     dd->pme_nodeid,
1654                     gmx::boolToString(dd->pme_receive_vir_ener));
1655         }
1656     }
1657     else
1658     {
1659         dd->pme_nodeid = -1;
1660     }
1661
1662     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1663     if (MASTER(cr))
1664     {
1665         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1666     }
1667 }
1668
1669 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1670 {
1671     real* slb_frac = nullptr;
1672     if (nc > 1 && size_string != nullptr)
1673     {
1674         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1675         snew(slb_frac, nc);
1676         real tot = 0;
1677         for (int i = 0; i < nc; i++)
1678         {
1679             double dbl = 0;
1680             int    n   = 0;
1681             sscanf(size_string, "%20lf%n", &dbl, &n);
1682             if (dbl == 0)
1683             {
1684                 gmx_fatal(FARGS,
1685                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1686                           dir,
1687                           size_string);
1688             }
1689             slb_frac[i] = dbl;
1690             size_string += n;
1691             tot += slb_frac[i];
1692         }
1693         /* Normalize */
1694         std::string relativeCellSizes = "Relative cell sizes:";
1695         for (int i = 0; i < nc; i++)
1696         {
1697             slb_frac[i] /= tot;
1698             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1699         }
1700         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1701     }
1702
1703     return slb_frac;
1704 }
1705
1706 static int multi_body_bondeds_count(const gmx_mtop_t& mtop)
1707 {
1708     int n = 0;
1709     for (const auto ilists : IListRange(mtop))
1710     {
1711         for (auto& ilist : extractILists(ilists.list(), IF_BOND))
1712         {
1713             if (NRAL(ilist.functionType) > 2)
1714             {
1715                 n += ilists.nmol() * (ilist.iatoms.size() / ilistStride(ilist));
1716             }
1717         }
1718     }
1719
1720     return n;
1721 }
1722
1723 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1724 {
1725     int   nst = def;
1726     char* val = getenv(env_var);
1727     if (val)
1728     {
1729         if (sscanf(val, "%20d", &nst) <= 0)
1730         {
1731             nst = 1;
1732         }
1733         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1734     }
1735
1736     return nst;
1737 }
1738
1739 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec& inputrec, const gmx::MDLogger& mdlog)
1740 {
1741     if (inputrec.pbcType == PbcType::Screw
1742         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1743     {
1744         gmx_fatal(FARGS,
1745                   "With pbc=%s can only do domain decomposition in the x-direction",
1746                   c_pbcTypeNames[inputrec.pbcType].c_str());
1747     }
1748
1749     if (inputrec.nstlist == 0)
1750     {
1751         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1752     }
1753
1754     if (inputrec.comm_mode == ComRemovalAlgorithm::Angular && inputrec.pbcType != PbcType::No)
1755     {
1756         GMX_LOG(mdlog.warning)
1757                 .appendText(
1758                         "comm-mode angular will give incorrect results when the comm group "
1759                         "partially crosses a periodic boundary");
1760     }
1761 }
1762
1763 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1764 {
1765     real r = ddbox.box_size[XX];
1766     for (int d = 0; d < DIM; d++)
1767     {
1768         if (numDomains[d] > 1)
1769         {
1770             /* Check using the initial average cell size */
1771             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1772         }
1773     }
1774
1775     return r;
1776 }
1777
1778 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1779  */
1780 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1781                                   const std::string&   reasonStr,
1782                                   const gmx::MDLogger& mdlog)
1783 {
1784     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1785     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1786
1787     if (cmdlineDlbState == DlbState::onUser)
1788     {
1789         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1790     }
1791     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1792     {
1793         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1794     }
1795     return DlbState::offForever;
1796 }
1797
1798 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1799  *
1800  * This function parses the parameters of "-dlb" command line option setting
1801  * corresponding state values. Then it checks the consistency of the determined
1802  * state with other run parameters and settings. As a result, the initial state
1803  * may be altered or an error may be thrown if incompatibility of options is detected.
1804  *
1805  * \param [in] mdlog       Logger.
1806  * \param [in] dlbOption   Enum value for the DLB option.
1807  * \param [in] bRecordLoad True if the load balancer is recording load information.
1808  * \param [in] mdrunOptions  Options for mdrun.
1809  * \param [in] inputrec    Pointer mdrun to input parameters.
1810  * \returns                DLB initial/startup state.
1811  */
1812 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1813                                          DlbOption                dlbOption,
1814                                          gmx_bool                 bRecordLoad,
1815                                          const gmx::MdrunOptions& mdrunOptions,
1816                                          const t_inputrec&        inputrec)
1817 {
1818     DlbState dlbState = DlbState::offCanTurnOn;
1819
1820     switch (dlbOption)
1821     {
1822         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1823         case DlbOption::no: dlbState = DlbState::offUser; break;
1824         case DlbOption::yes: dlbState = DlbState::onUser; break;
1825         default: gmx_incons("Invalid dlbOption enum value");
1826     }
1827
1828     /* Reruns don't support DLB: bail or override auto mode */
1829     if (mdrunOptions.rerun)
1830     {
1831         std::string reasonStr = "it is not supported in reruns.";
1832         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1833     }
1834
1835     /* Unsupported integrators */
1836     if (!EI_DYNAMICS(inputrec.eI))
1837     {
1838         auto reasonStr =
1839                 gmx::formatString("it is only supported with dynamics, not with integrator '%s'.",
1840                                   enumValueToString(inputrec.eI));
1841         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1842     }
1843
1844     /* Without cycle counters we can't time work to balance on */
1845     if (!bRecordLoad)
1846     {
1847         std::string reasonStr =
1848                 "cycle counters unsupported or not enabled in the operating system kernel.";
1849         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1850     }
1851
1852     if (mdrunOptions.reproducible)
1853     {
1854         std::string reasonStr = "you started a reproducible run.";
1855         switch (dlbState)
1856         {
1857             case DlbState::offUser: break;
1858             case DlbState::offForever:
1859                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1860                 break;
1861             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1862             case DlbState::onCanTurnOff:
1863                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1864                 break;
1865             case DlbState::onUser:
1866                 return forceDlbOffOrBail(
1867                         dlbState,
1868                         reasonStr
1869                                 + " In load balanced runs binary reproducibility cannot be "
1870                                   "ensured.",
1871                         mdlog);
1872             default:
1873                 gmx_fatal(FARGS,
1874                           "Death horror: undefined case (%d) for load balancing choice",
1875                           static_cast<int>(dlbState));
1876         }
1877     }
1878
1879     return dlbState;
1880 }
1881
1882 static gmx_domdec_comm_t* init_dd_comm()
1883 {
1884     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1885
1886     comm->n_load_have    = 0;
1887     comm->n_load_collect = 0;
1888
1889     comm->haveTurnedOffDlb = false;
1890
1891     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1892     {
1893         comm->sum_nat[i] = 0;
1894     }
1895     comm->ndecomp   = 0;
1896     comm->nload     = 0;
1897     comm->load_step = 0;
1898     comm->load_sum  = 0;
1899     comm->load_max  = 0;
1900     clear_ivec(comm->load_lim);
1901     comm->load_mdf = 0;
1902     comm->load_pme = 0;
1903
1904     /* This should be replaced by a unique pointer */
1905     comm->balanceRegion = ddBalanceRegionAllocate();
1906
1907     return comm;
1908 }
1909
1910 static void setupUpdateGroups(const gmx::MDLogger&              mdlog,
1911                               const gmx_mtop_t&                 mtop,
1912                               ArrayRef<const RangePartitioning> updateGroupingsPerMoleculeType,
1913                               const bool                        useUpdateGroups,
1914                               const real                        maxUpdateGroupRadius,
1915                               DDSystemInfo*                     systemInfo)
1916 {
1917     systemInfo->updateGroupingsPerMoleculeType = updateGroupingsPerMoleculeType;
1918     systemInfo->useUpdateGroups                = useUpdateGroups;
1919     systemInfo->maxUpdateGroupRadius           = maxUpdateGroupRadius;
1920
1921     if (systemInfo->useUpdateGroups)
1922     {
1923         int numUpdateGroups = 0;
1924         for (const auto& molblock : mtop.molblock)
1925         {
1926             numUpdateGroups += molblock.nmol
1927                                * systemInfo->updateGroupingsPerMoleculeType[molblock.type].numBlocks();
1928         }
1929
1930         GMX_LOG(mdlog.info)
1931                 .appendTextFormatted(
1932                         "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1933                         "nm\n",
1934                         numUpdateGroups,
1935                         mtop.natoms / static_cast<double>(numUpdateGroups),
1936                         systemInfo->maxUpdateGroupRadius);
1937     }
1938 }
1939
1940 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
1941     npbcdim(numPbcDimensions(ir.pbcType)),
1942     numBoundedDimensions(inputrec2nboundeddim(&ir)),
1943     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
1944     haveScrewPBC(ir.pbcType == PbcType::Screw)
1945 {
1946 }
1947
1948 /* Returns whether molecules are always whole, i.e. not broken by PBC */
1949 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
1950                                     const bool                                  useUpdateGroups,
1951                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType)
1952 {
1953     if (useUpdateGroups)
1954     {
1955         GMX_RELEASE_ASSERT(updateGroupingsPerMoleculeType.size() == mtop.moltype.size(),
1956                            "Need one grouping per moltype");
1957         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
1958         {
1959             if (updateGroupingsPerMoleculeType[mol].numBlocks() > 1)
1960             {
1961                 return false;
1962             }
1963         }
1964     }
1965     else
1966     {
1967         for (const auto& moltype : mtop.moltype)
1968         {
1969             if (moltype.atoms.nr > 1)
1970             {
1971                 return false;
1972             }
1973         }
1974     }
1975
1976     return true;
1977 }
1978
1979 /*! \brief Generate the simulation system information */
1980 static DDSystemInfo getSystemInfo(const gmx::MDLogger&              mdlog,
1981                                   DDRole                            ddRole,
1982                                   MPI_Comm                          communicator,
1983                                   const DomdecOptions&              options,
1984                                   const gmx_mtop_t&                 mtop,
1985                                   const t_inputrec&                 ir,
1986                                   const matrix                      box,
1987                                   ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
1988                                   const bool                        useUpdateGroups,
1989                                   const real                        maxUpdateGroupRadius,
1990                                   gmx::ArrayRef<const gmx::RVec>    xGlobal)
1991 {
1992     const real tenPercentMargin = 1.1;
1993
1994     DDSystemInfo systemInfo;
1995
1996     setupUpdateGroups(
1997             mdlog, mtop, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, &systemInfo);
1998
1999     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2000             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingsPerMoleculeType);
2001     systemInfo.haveInterDomainBondeds =
2002             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2003     systemInfo.haveInterDomainMultiBodyBondeds =
2004             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(mtop) > 0);
2005
2006     if (systemInfo.useUpdateGroups)
2007     {
2008         systemInfo.mayHaveSplitConstraints = false;
2009         systemInfo.mayHaveSplitSettles     = false;
2010     }
2011     else
2012     {
2013         systemInfo.mayHaveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2014                                               || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2015         systemInfo.mayHaveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2016     }
2017
2018     if (ir.rlist == 0)
2019     {
2020         /* Set the cut-off to some very large value,
2021          * so we don't need if statements everywhere in the code.
2022          * We use sqrt, since the cut-off is squared in some places.
2023          */
2024         systemInfo.cutoff = GMX_CUTOFF_INF;
2025     }
2026     else
2027     {
2028         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2029     }
2030     systemInfo.minCutoffForMultiBody = 0;
2031
2032     /* Determine the minimum cell size limit, affected by many factors */
2033     systemInfo.cellsizeLimit             = 0;
2034     systemInfo.filterBondedCommunication = false;
2035
2036     /* We do not allow home atoms to move beyond the neighboring domain
2037      * between domain decomposition steps, which limits the cell size.
2038      * Get an estimate of cell size limit due to atom displacement.
2039      * In most cases this is a large overestimate, because it assumes
2040      * non-interaction atoms.
2041      * We set the chance to 1 in a trillion steps.
2042      */
2043     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2044     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2045             mtop, ir, systemInfo.updateGroupingsPerMoleculeType, c_chanceThatAtomMovesBeyondDomain);
2046     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2047
2048     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2049
2050     /* TODO: PME decomposition currently requires atoms not to be more than
2051      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2052      *       In nearly all cases, limitForAtomDisplacement will be smaller
2053      *       than 2/3*rlist, so the PME requirement is satisfied.
2054      *       But it would be better for both correctness and performance
2055      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2056      *       Note that we would need to improve the pairlist buffer case.
2057      */
2058
2059     if (systemInfo.haveInterDomainBondeds)
2060     {
2061         if (options.minimumCommunicationRange > 0)
2062         {
2063             systemInfo.minCutoffForMultiBody =
2064                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2065             if (options.useBondedCommunication)
2066             {
2067                 systemInfo.filterBondedCommunication =
2068                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2069             }
2070             else
2071             {
2072                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2073             }
2074         }
2075         else if (ir.bPeriodicMols)
2076         {
2077             /* Can not easily determine the required cut-off */
2078             GMX_LOG(mdlog.warning)
2079                     .appendText(
2080                             "NOTE: Periodic molecules are present in this system. Because of this, "
2081                             "the domain decomposition algorithm cannot easily determine the "
2082                             "minimum cell size that it requires for treating bonded interactions. "
2083                             "Instead, domain decomposition will assume that half the non-bonded "
2084                             "cut-off will be a suitable lower bound.");
2085             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2086         }
2087         else
2088         {
2089             real r_2b = 0;
2090             real r_mb = 0;
2091
2092             if (ddRole == DDRole::Master)
2093             {
2094                 dd_bonded_cg_distance(mdlog, mtop, ir, xGlobal, box, options.ddBondedChecking, &r_2b, &r_mb);
2095             }
2096             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2097             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2098
2099             /* We use an initial margin of 10% for the minimum cell size,
2100              * except when we are just below the non-bonded cut-off.
2101              */
2102             if (options.useBondedCommunication)
2103             {
2104                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2105                 {
2106                     const real r_bonded              = std::max(r_2b, r_mb);
2107                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2108                     /* This is the (only) place where we turn on the filtering */
2109                     systemInfo.filterBondedCommunication = true;
2110                 }
2111                 else
2112                 {
2113                     const real r_bonded = r_mb;
2114                     systemInfo.minCutoffForMultiBody =
2115                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2116                 }
2117                 /* We determine cutoff_mbody later */
2118                 systemInfo.increaseMultiBodyCutoff = true;
2119             }
2120             else
2121             {
2122                 /* No special bonded communication,
2123                  * simply increase the DD cut-off.
2124                  */
2125                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2126                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2127             }
2128         }
2129         GMX_LOG(mdlog.info)
2130                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2131                                      systemInfo.minCutoffForMultiBody);
2132
2133         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2134     }
2135
2136     systemInfo.constraintCommunicationRange = 0;
2137     if (systemInfo.mayHaveSplitConstraints && options.constraintCommunicationRange <= 0)
2138     {
2139         /* There is a cell size limit due to the constraints (P-LINCS) */
2140         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2141         GMX_LOG(mdlog.info)
2142                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2143                                      systemInfo.constraintCommunicationRange);
2144         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2145         {
2146             GMX_LOG(mdlog.info)
2147                     .appendText(
2148                             "This distance will limit the DD cell size, you can override this with "
2149                             "-rcon");
2150         }
2151     }
2152     else if (options.constraintCommunicationRange > 0)
2153     {
2154         /* Here we do not check for dd->splitConstraints.
2155          * because one can also set a cell size limit for virtual sites only
2156          * and at this point we don't know yet if there are intercg v-sites.
2157          */
2158         GMX_LOG(mdlog.info)
2159                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2160                                      options.constraintCommunicationRange);
2161         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2162     }
2163     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2164
2165     return systemInfo;
2166 }
2167
2168 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2169  * implemented. */
2170 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2171                              DDRole               ddRole,
2172                              MPI_Comm             communicator,
2173                              int                  numNodes,
2174                              const DomdecOptions& options,
2175                              const DDSettings&    ddSettings,
2176                              const DDSystemInfo&  systemInfo,
2177                              const real           cellsizeLimit,
2178                              const gmx_ddbox_t&   ddbox)
2179 {
2180     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2181     {
2182         const bool  bC = (systemInfo.mayHaveSplitConstraints
2183                          && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2184         std::string message =
2185                 gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
2186                                   !bC ? "-rdd" : "-rcon",
2187                                   ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2188                                   bC ? " or your LINCS settings" : "");
2189
2190         gmx_fatal_collective(FARGS,
2191                              communicator,
2192                              ddRole == DDRole::Master,
2193                              "There is no domain decomposition for %d ranks that is compatible "
2194                              "with the given box and a minimum cell size of %g nm\n"
2195                              "%s\n"
2196                              "Look in the log file for details on the domain decomposition",
2197                              numNodes - ddGridSetup.numPmeOnlyRanks,
2198                              cellsizeLimit,
2199                              message.c_str());
2200     }
2201
2202     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2203     if (acs < cellsizeLimit)
2204     {
2205         if (options.numCells[XX] <= 0)
2206         {
2207             GMX_RELEASE_ASSERT(
2208                     false,
2209                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2210         }
2211         else
2212         {
2213             gmx_fatal_collective(
2214                     FARGS,
2215                     communicator,
2216                     ddRole == DDRole::Master,
2217                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2218                     "options -dd, -rdd or -rcon, see the log file for details",
2219                     acs,
2220                     cellsizeLimit);
2221         }
2222     }
2223
2224     const int numPPRanks =
2225             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2226     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2227     {
2228         gmx_fatal_collective(FARGS,
2229                              communicator,
2230                              ddRole == DDRole::Master,
2231                              "The size of the domain decomposition grid (%d) does not match the "
2232                              "number of PP ranks (%d). The total number of ranks is %d",
2233                              numPPRanks,
2234                              numNodes - ddGridSetup.numPmeOnlyRanks,
2235                              numNodes);
2236     }
2237     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2238     {
2239         gmx_fatal_collective(FARGS,
2240                              communicator,
2241                              ddRole == DDRole::Master,
2242                              "The number of separate PME ranks (%d) is larger than the number of "
2243                              "PP ranks (%d), this is not supported.",
2244                              ddGridSetup.numPmeOnlyRanks,
2245                              numPPRanks);
2246     }
2247 }
2248
2249 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2250 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2251                                   int                  numNodes,
2252                                   const DDGridSetup&   ddGridSetup,
2253                                   const t_inputrec&    ir)
2254 {
2255     GMX_LOG(mdlog.info)
2256             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2257                                  ddGridSetup.numDomains[XX],
2258                                  ddGridSetup.numDomains[YY],
2259                                  ddGridSetup.numDomains[ZZ],
2260                                  ddGridSetup.numPmeOnlyRanks);
2261
2262     DDRankSetup ddRankSetup;
2263
2264     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2265     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2266
2267     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2268     if (ddRankSetup.usePmeOnlyRanks)
2269     {
2270         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2271     }
2272     else
2273     {
2274         ddRankSetup.numRanksDoingPme =
2275                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2276     }
2277
2278     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2279     {
2280         /* The following choices should match those
2281          * in comm_cost_est in domdec_setup.c.
2282          * Note that here the checks have to take into account
2283          * that the decomposition might occur in a different order than xyz
2284          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2285          * in which case they will not match those in comm_cost_est,
2286          * but since that is mainly for testing purposes that's fine.
2287          */
2288         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2289             && ddGridSetup.ddDimensions[1] == YY
2290             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2291             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2292             && getenv("GMX_PMEONEDD") == nullptr)
2293         {
2294             ddRankSetup.npmedecompdim = 2;
2295             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2296             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2297         }
2298         else
2299         {
2300             /* In case nc is 1 in both x and y we could still choose to
2301              * decompose pme in y instead of x, but we use x for simplicity.
2302              */
2303             ddRankSetup.npmedecompdim = 1;
2304             if (ddGridSetup.ddDimensions[0] == YY)
2305             {
2306                 ddRankSetup.npmenodes_x = 1;
2307                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2308             }
2309             else
2310             {
2311                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2312                 ddRankSetup.npmenodes_y = 1;
2313             }
2314         }
2315         GMX_LOG(mdlog.info)
2316                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2317                                      ddRankSetup.npmenodes_x,
2318                                      ddRankSetup.npmenodes_y,
2319                                      1);
2320     }
2321     else
2322     {
2323         ddRankSetup.npmedecompdim = 0;
2324         ddRankSetup.npmenodes_x   = 0;
2325         ddRankSetup.npmenodes_y   = 0;
2326     }
2327
2328     return ddRankSetup;
2329 }
2330
2331 /*! \brief Set the cell size and interaction limits */
2332 static void set_dd_limits(const gmx::MDLogger& mdlog,
2333                           DDRole               ddRole,
2334                           gmx_domdec_t*        dd,
2335                           const DomdecOptions& options,
2336                           const DDSettings&    ddSettings,
2337                           const DDSystemInfo&  systemInfo,
2338                           const DDGridSetup&   ddGridSetup,
2339                           const int            numPPRanks,
2340                           const gmx_mtop_t&    mtop,
2341                           const t_inputrec&    ir,
2342                           const gmx_ddbox_t&   ddbox)
2343 {
2344     gmx_domdec_comm_t* comm = dd->comm;
2345     comm->ddSettings        = ddSettings;
2346
2347     /* Initialize to GPU share count to 0, might change later */
2348     comm->nrank_gpu_shared = 0;
2349
2350     comm->dlbState = comm->ddSettings.initialDlbState;
2351     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2352     /* To consider turning DLB on after 2*nstlist steps we need to check
2353      * at partitioning count 3. Thus we need to increase the first count by 2.
2354      */
2355     comm->ddPartioningCountFirstDlbOff += 2;
2356
2357     comm->bPMELoadBalDLBLimits = FALSE;
2358
2359     /* Allocate the charge group/atom sorting struct */
2360     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2361
2362     comm->systemInfo = systemInfo;
2363
2364     if (systemInfo.useUpdateGroups)
2365     {
2366         /* Note: We would like to use dd->nnodes for the atom count estimate,
2367          *       but that is not yet available here. But this anyhow only
2368          *       affect performance up to the second dd_partition_system call.
2369          */
2370         const int homeAtomCountEstimate = mtop.natoms / numPPRanks;
2371         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2372                 mtop, systemInfo.updateGroupingsPerMoleculeType, maxReferenceTemperature(ir), homeAtomCountEstimate);
2373     }
2374
2375     /* Set the DD setup given by ddGridSetup */
2376     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2377     dd->ndim = ddGridSetup.numDDDimensions;
2378     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2379
2380     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2381
2382     snew(comm->slb_frac, DIM);
2383     if (isDlbDisabled(comm))
2384     {
2385         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2386         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2387         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2388     }
2389
2390     /* Set the multi-body cut-off and cellsize limit for DLB */
2391     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2392     comm->cellsize_limit = systemInfo.cellsizeLimit;
2393     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2394     {
2395         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2396         {
2397             /* Set the bonded communication distance to halfway
2398              * the minimum and the maximum,
2399              * since the extra communication cost is nearly zero.
2400              */
2401             real acs           = average_cellsize_min(ddbox, dd->numCells);
2402             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2403             if (!isDlbDisabled(comm))
2404             {
2405                 /* Check if this does not limit the scaling */
2406                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2407             }
2408             if (!systemInfo.filterBondedCommunication)
2409             {
2410                 /* Without bBondComm do not go beyond the n.b. cut-off */
2411                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2412                 if (comm->cellsize_limit >= systemInfo.cutoff)
2413                 {
2414                     /* We don't loose a lot of efficieny
2415                      * when increasing it to the n.b. cut-off.
2416                      * It can even be slightly faster, because we need
2417                      * less checks for the communication setup.
2418                      */
2419                     comm->cutoff_mbody = systemInfo.cutoff;
2420                 }
2421             }
2422             /* Check if we did not end up below our original limit */
2423             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2424
2425             if (comm->cutoff_mbody > comm->cellsize_limit)
2426             {
2427                 comm->cellsize_limit = comm->cutoff_mbody;
2428             }
2429         }
2430         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2431     }
2432
2433     if (debug)
2434     {
2435         fprintf(debug,
2436                 "Bonded atom communication beyond the cut-off: %s\n"
2437                 "cellsize limit %f\n",
2438                 gmx::boolToString(systemInfo.filterBondedCommunication),
2439                 comm->cellsize_limit);
2440     }
2441
2442     if (ddRole == DDRole::Master)
2443     {
2444         check_dd_restrictions(dd, ir, mdlog);
2445     }
2446 }
2447
2448 static void writeSettings(gmx::TextWriter*   log,
2449                           gmx_domdec_t*      dd,
2450                           const gmx_mtop_t&  mtop,
2451                           const t_inputrec&  ir,
2452                           gmx_bool           bDynLoadBal,
2453                           real               dlb_scale,
2454                           const gmx_ddbox_t* ddbox)
2455 {
2456     gmx_domdec_comm_t* comm = dd->comm;
2457
2458     if (bDynLoadBal)
2459     {
2460         log->writeString("The maximum number of communication pulses is:");
2461         for (int d = 0; d < dd->ndim; d++)
2462         {
2463             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2464         }
2465         log->ensureLineBreak();
2466         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2467                                 comm->cellsize_limit);
2468         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2469         log->writeString("The allowed shrink of domain decomposition cells is:");
2470         for (int d = 0; d < DIM; d++)
2471         {
2472             if (dd->numCells[d] > 1)
2473             {
2474                 const real shrink =
2475                         (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2476                                 ? 0
2477                                 : comm->cellsize_min_dlb[d]
2478                                           / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2479                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2480             }
2481         }
2482         log->ensureLineBreak();
2483     }
2484     else
2485     {
2486         ivec np;
2487         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2488         log->writeString("The initial number of communication pulses is:");
2489         for (int d = 0; d < dd->ndim; d++)
2490         {
2491             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2492         }
2493         log->ensureLineBreak();
2494         log->writeString("The initial domain decomposition cell size is:");
2495         for (int d = 0; d < DIM; d++)
2496         {
2497             if (dd->numCells[d] > 1)
2498             {
2499                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2500             }
2501         }
2502         log->ensureLineBreak();
2503         log->writeLine();
2504     }
2505
2506     const bool haveInterDomainVsites =
2507             (countInterUpdategroupVsites(mtop, comm->systemInfo.updateGroupingsPerMoleculeType) != 0);
2508
2509     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2510         || comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2511     {
2512         std::string decompUnits;
2513         if (comm->systemInfo.useUpdateGroups)
2514         {
2515             decompUnits = "atom groups";
2516         }
2517         else
2518         {
2519             decompUnits = "atoms";
2520         }
2521
2522         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2523                                 decompUnits.c_str());
2524         log->writeLineFormatted(
2525                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2526
2527         real limit = 0;
2528         if (bDynLoadBal)
2529         {
2530             limit = dd->comm->cellsize_limit;
2531         }
2532         else
2533         {
2534             if (dd->unitCellInfo.ddBoxIsDynamic)
2535             {
2536                 log->writeLine(
2537                         "(the following are initial values, they could change due to box "
2538                         "deformation)");
2539             }
2540             limit = dd->comm->cellsize_min[XX];
2541             for (int d = 1; d < DIM; d++)
2542             {
2543                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2544             }
2545         }
2546
2547         if (comm->systemInfo.haveInterDomainBondeds)
2548         {
2549             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2550                                     "two-body bonded interactions",
2551                                     "(-rdd)",
2552                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2553             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2554                                     "multi-body bonded interactions",
2555                                     "(-rdd)",
2556                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2557                                             ? comm->cutoff_mbody
2558                                             : std::min(comm->systemInfo.cutoff, limit));
2559         }
2560         if (haveInterDomainVsites)
2561         {
2562             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2563         }
2564         if (comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
2565         {
2566             std::string separation =
2567                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir.nProjOrder);
2568             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2569         }
2570         log->ensureLineBreak();
2571     }
2572 }
2573
2574 static void logSettings(const gmx::MDLogger& mdlog,
2575                         gmx_domdec_t*        dd,
2576                         const gmx_mtop_t&    mtop,
2577                         const t_inputrec&    ir,
2578                         real                 dlb_scale,
2579                         const gmx_ddbox_t*   ddbox)
2580 {
2581     gmx::StringOutputStream stream;
2582     gmx::TextWriter         log(&stream);
2583     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2584     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2585     {
2586         {
2587             log.ensureEmptyLine();
2588             log.writeLine(
2589                     "When dynamic load balancing gets turned on, these settings will change to:");
2590         }
2591         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2592     }
2593     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2594 }
2595
2596 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2597                                 gmx_domdec_t*        dd,
2598                                 real                 dlb_scale,
2599                                 const t_inputrec&    inputrec,
2600                                 const gmx_ddbox_t*   ddbox)
2601 {
2602     int npulse       = 0;
2603     int npulse_d_max = 0;
2604     int npulse_d     = 0;
2605
2606     gmx_domdec_comm_t* comm = dd->comm;
2607
2608     bool bNoCutOff = (inputrec.rvdw == 0 || inputrec.rcoulomb == 0);
2609
2610     /* Determine the maximum number of comm. pulses in one dimension */
2611
2612     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2613
2614     /* Determine the maximum required number of grid pulses */
2615     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2616     {
2617         /* Only a single pulse is required */
2618         npulse = 1;
2619     }
2620     else if (!bNoCutOff && comm->cellsize_limit > 0)
2621     {
2622         /* We round down slightly here to avoid overhead due to the latency
2623          * of extra communication calls when the cut-off
2624          * would be only slightly longer than the cell size.
2625          * Later cellsize_limit is redetermined,
2626          * so we can not miss interactions due to this rounding.
2627          */
2628         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2629     }
2630     else
2631     {
2632         /* There is no cell size limit */
2633         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2634     }
2635
2636     if (!bNoCutOff && npulse > 1)
2637     {
2638         /* See if we can do with less pulses, based on dlb_scale */
2639         npulse_d_max = 0;
2640         for (int d = 0; d < dd->ndim; d++)
2641         {
2642             int dim  = dd->dim[d];
2643             npulse_d = static_cast<int>(
2644                     1
2645                     + dd->numCells[dim] * comm->systemInfo.cutoff
2646                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2647             npulse_d_max = std::max(npulse_d_max, npulse_d);
2648         }
2649         npulse = std::min(npulse, npulse_d_max);
2650     }
2651
2652     /* This env var can override npulse */
2653     const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2654     if (ddPulseEnv > 0)
2655     {
2656         npulse = ddPulseEnv;
2657     }
2658
2659     comm->maxpulse       = 1;
2660     comm->bVacDLBNoLimit = (inputrec.pbcType == PbcType::No);
2661     for (int d = 0; d < dd->ndim; d++)
2662     {
2663         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2664         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2665         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2666         {
2667             comm->bVacDLBNoLimit = FALSE;
2668         }
2669     }
2670
2671     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2672     if (!comm->bVacDLBNoLimit)
2673     {
2674         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2675     }
2676     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2677     /* Set the minimum cell size for each DD dimension */
2678     for (int d = 0; d < dd->ndim; d++)
2679     {
2680         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2681         {
2682             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2683         }
2684         else
2685         {
2686             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2687         }
2688     }
2689     if (comm->cutoff_mbody <= 0)
2690     {
2691         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2692     }
2693     if (isDlbOn(comm))
2694     {
2695         set_dlb_limits(dd);
2696     }
2697 }
2698
2699 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2700 {
2701     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2702 }
2703
2704 bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType)
2705 {
2706     /* If each molecule is a single charge group
2707      * or we use domain decomposition for each periodic dimension,
2708      * we do not need to take pbc into account for the bonded interactions.
2709      */
2710     return (pbcType != PbcType::No && dd.comm->systemInfo.haveInterDomainBondeds
2711             && !(dd.numCells[XX] > 1 && dd.numCells[YY] > 1
2712                  && (dd.numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2713 }
2714
2715 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2716 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2717                                   gmx_domdec_t*        dd,
2718                                   real                 dlb_scale,
2719                                   const gmx_mtop_t&    mtop,
2720                                   const t_inputrec&    inputrec,
2721                                   const gmx_ddbox_t*   ddbox)
2722 {
2723     gmx_domdec_comm_t* comm        = dd->comm;
2724     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2725
2726     if (EEL_PME(inputrec.coulombtype) || EVDW_PME(inputrec.vdwtype))
2727     {
2728         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2729         if (ddRankSetup.npmedecompdim >= 2)
2730         {
2731             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2732         }
2733     }
2734     else
2735     {
2736         ddRankSetup.numRanksDoingPme = 0;
2737         if (dd->pme_nodeid >= 0)
2738         {
2739             gmx_fatal_collective(FARGS,
2740                                  dd->mpi_comm_all,
2741                                  DDMASTER(dd),
2742                                  "Can not have separate PME ranks without PME electrostatics");
2743         }
2744     }
2745
2746     if (debug)
2747     {
2748         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2749     }
2750     if (!isDlbDisabled(comm))
2751     {
2752         set_cell_limits_dlb(mdlog, dd, dlb_scale, inputrec, ddbox);
2753     }
2754
2755     logSettings(mdlog, dd, mtop, inputrec, dlb_scale, ddbox);
2756
2757     const real vol_frac = (inputrec.pbcType == PbcType::No)
2758                                   ? (1 - 1 / static_cast<double>(dd->nnodes))
2759                                   : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2760                                      / static_cast<double>(dd->nnodes));
2761     if (debug)
2762     {
2763         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2764     }
2765     int natoms_tot = mtop.natoms;
2766
2767     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2768 }
2769
2770 /*! \brief Get some important DD parameters which can be modified by env.vars */
2771 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2772                                 const DomdecOptions&     options,
2773                                 const gmx::MdrunOptions& mdrunOptions,
2774                                 const t_inputrec&        ir)
2775 {
2776     DDSettings ddSettings;
2777
2778     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2779     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2780     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2781     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2782     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2783     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2784     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2785     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2786     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2787
2788     if (ddSettings.useSendRecv2)
2789     {
2790         GMX_LOG(mdlog.info)
2791                 .appendText(
2792                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2793                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2794                         "communication");
2795     }
2796
2797     if (ddSettings.eFlop)
2798     {
2799         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2800         ddSettings.recordLoad = true;
2801     }
2802     else
2803     {
2804         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2805     }
2806
2807     ddSettings.initialDlbState =
2808             determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
2809     GMX_LOG(mdlog.info)
2810             .appendTextFormatted("Dynamic load balancing: %s",
2811                                  enumValueToString(ddSettings.initialDlbState));
2812
2813     return ddSettings;
2814 }
2815
2816 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2817
2818 gmx_domdec_t::~gmx_domdec_t() = default;
2819
2820 namespace gmx
2821 {
2822
2823 // TODO once the functionality stablizes, move this class and
2824 // supporting functionality into builder.cpp
2825 /*! \brief Impl class for DD builder */
2826 class DomainDecompositionBuilder::Impl
2827 {
2828 public:
2829     //! Constructor
2830     Impl(const MDLogger&                   mdlog,
2831          t_commrec*                        cr,
2832          const DomdecOptions&              options,
2833          const MdrunOptions&               mdrunOptions,
2834          const gmx_mtop_t&                 mtop,
2835          const t_inputrec&                 ir,
2836          const matrix                      box,
2837          ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2838          bool                              useUpdateGroups,
2839          real                              maxUpdateGroupRadius,
2840          ArrayRef<const RVec>              xGlobal);
2841
2842     //! Build the resulting DD manager
2843     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2844
2845     //! Objects used in constructing and configuring DD
2846     //! {
2847     //! Logging object
2848     const MDLogger& mdlog_;
2849     //! Communication object
2850     t_commrec* cr_;
2851     //! User-supplied options configuring DD behavior
2852     const DomdecOptions options_;
2853     //! Global system topology
2854     const gmx_mtop_t& mtop_;
2855     //! User input values from the tpr file
2856     const t_inputrec& ir_;
2857     //! }
2858
2859     //! Internal objects used in constructing DD
2860     //! {
2861     //! Settings combined from the user input
2862     DDSettings ddSettings_;
2863     //! Information derived from the simulation system
2864     DDSystemInfo systemInfo_;
2865     //! Box structure
2866     gmx_ddbox_t ddbox_ = { 0 };
2867     //! Organization of the DD grids
2868     DDGridSetup ddGridSetup_;
2869     //! Organzation of the DD ranks
2870     DDRankSetup ddRankSetup_;
2871     //! Number of DD cells in each dimension
2872     ivec ddCellIndex_ = { 0, 0, 0 };
2873     //! IDs of PME-only ranks
2874     std::vector<int> pmeRanks_;
2875     //! Contains a valid Cartesian-communicator-based setup, or defaults.
2876     CartesianRankSetup cartSetup_;
2877     //! }
2878 };
2879
2880 DomainDecompositionBuilder::Impl::Impl(const MDLogger&                   mdlog,
2881                                        t_commrec*                        cr,
2882                                        const DomdecOptions&              options,
2883                                        const MdrunOptions&               mdrunOptions,
2884                                        const gmx_mtop_t&                 mtop,
2885                                        const t_inputrec&                 ir,
2886                                        const matrix                      box,
2887                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
2888                                        const bool                        useUpdateGroups,
2889                                        const real                        maxUpdateGroupRadius,
2890                                        ArrayRef<const RVec>              xGlobal) :
2891     mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir)
2892 {
2893     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2894
2895     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2896
2897     if (ddSettings_.eFlop > 1)
2898     {
2899         /* Ensure that we have different random flop counts on different ranks */
2900         srand(1 + cr_->rankInDefaultCommunicator);
2901     }
2902
2903     systemInfo_ = getSystemInfo(mdlog_,
2904                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2905                                 cr->mpiDefaultCommunicator,
2906                                 options_,
2907                                 mtop_,
2908                                 ir_,
2909                                 box,
2910                                 updateGroupingPerMoleculeType,
2911                                 useUpdateGroups,
2912                                 maxUpdateGroupRadius,
2913                                 xGlobal);
2914
2915     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
2916     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2917     checkForValidRankCountRequests(
2918             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
2919
2920     // DD grid setup uses a more different cell size limit for
2921     // automated setup than the one in systemInfo_. The latter is used
2922     // in set_dd_limits() to configure DLB, for example.
2923     const real gridSetupCellsizeLimit =
2924             getDDGridSetupCellSizeLimit(mdlog_,
2925                                         !isDlbDisabled(ddSettings_.initialDlbState),
2926                                         options_.dlbScaling,
2927                                         ir_,
2928                                         systemInfo_.cellsizeLimit);
2929     ddGridSetup_ = getDDGridSetup(mdlog_,
2930                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2931                                   cr->mpiDefaultCommunicator,
2932                                   numRanksRequested,
2933                                   options_,
2934                                   ddSettings_,
2935                                   systemInfo_,
2936                                   gridSetupCellsizeLimit,
2937                                   mtop_,
2938                                   ir_,
2939                                   box,
2940                                   xGlobal,
2941                                   &ddbox_);
2942     checkDDGridSetup(ddGridSetup_,
2943                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2944                      cr->mpiDefaultCommunicator,
2945                      cr->sizeOfDefaultCommunicator,
2946                      options_,
2947                      ddSettings_,
2948                      systemInfo_,
2949                      gridSetupCellsizeLimit,
2950                      ddbox_);
2951
2952     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
2953
2954     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
2955
2956     /* Generate the group communicator, also decides the duty of each rank */
2957     cartSetup_ = makeGroupCommunicators(
2958             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
2959 }
2960
2961 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
2962 {
2963     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
2964
2965     copy_ivec(ddCellIndex_, dd->ci);
2966
2967     dd->comm = init_dd_comm();
2968
2969     dd->comm->ddRankSetup        = ddRankSetup_;
2970     dd->comm->cartesianRankSetup = cartSetup_;
2971
2972     set_dd_limits(mdlog_,
2973                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2974                   dd,
2975                   options_,
2976                   ddSettings_,
2977                   systemInfo_,
2978                   ddGridSetup_,
2979                   ddRankSetup_.numPPRanks,
2980                   mtop_,
2981                   ir_,
2982                   ddbox_);
2983
2984     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
2985
2986     if (thisRankHasDuty(cr_, DUTY_PP))
2987     {
2988         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, mtop_, ir_, &ddbox_);
2989
2990         setup_neighbor_relations(dd);
2991     }
2992
2993     /* Set overallocation to avoid frequent reallocation of arrays */
2994     set_over_alloc_dd(true);
2995
2996     dd->atomSets = atomSets;
2997
2998     return dd;
2999 }
3000
3001 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3002                                                        t_commrec*           cr,
3003                                                        const DomdecOptions& options,
3004                                                        const MdrunOptions&  mdrunOptions,
3005                                                        const gmx_mtop_t&    mtop,
3006                                                        const t_inputrec&    ir,
3007                                                        const matrix         box,
3008                                                        ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
3009                                                        const bool           useUpdateGroups,
3010                                                        const real           maxUpdateGroupRadius,
3011                                                        ArrayRef<const RVec> xGlobal) :
3012     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, xGlobal))
3013 {
3014 }
3015
3016 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3017 {
3018     return impl_->build(atomSets);
3019 }
3020
3021 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3022
3023 } // namespace gmx
3024
3025 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3026 {
3027     gmx_ddbox_t ddbox;
3028     int         LocallyLimited = 0;
3029
3030     const auto* dd = cr->dd;
3031
3032     set_ddbox(*dd, false, box, true, x, &ddbox);
3033
3034     LocallyLimited = 0;
3035
3036     for (int d = 0; d < dd->ndim; d++)
3037     {
3038         const int dim = dd->dim[d];
3039
3040         real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3041         if (dd->unitCellInfo.ddBoxIsDynamic)
3042         {
3043             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3044         }
3045
3046         const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3047
3048         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3049         {
3050             if (np > dd->comm->cd[d].np_dlb)
3051             {
3052                 return FALSE;
3053             }
3054
3055             /* If a current local cell size is smaller than the requested
3056              * cut-off, we could still fix it, but this gets very complicated.
3057              * Without fixing here, we might actually need more checks.
3058              */
3059             real cellSizeAlongDim =
3060                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3061             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3062             {
3063                 LocallyLimited = 1;
3064             }
3065         }
3066     }
3067
3068     if (!isDlbDisabled(dd->comm))
3069     {
3070         /* If DLB is not active yet, we don't need to check the grid jumps.
3071          * Actually we shouldn't, because then the grid jump data is not set.
3072          */
3073         if (isDlbOn(dd->comm) && gmx::check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3074         {
3075             LocallyLimited = 1;
3076         }
3077
3078         gmx_sumi(1, &LocallyLimited, cr);
3079
3080         if (LocallyLimited > 0)
3081         {
3082             return FALSE;
3083         }
3084     }
3085
3086     return TRUE;
3087 }
3088
3089 bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3090 {
3091     bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3092
3093     if (bCutoffAllowed)
3094     {
3095         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3096     }
3097
3098     return bCutoffAllowed;
3099 }
3100
3101 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3102                               const t_commrec&                cr,
3103                               const gmx::DeviceStreamManager& deviceStreamManager,
3104                               gmx_wallcycle*                  wcycle)
3105 {
3106     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3107                        "Local non-bonded stream should be valid when using"
3108                        "GPU halo exchange.");
3109     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3110                        "Non-local non-bonded stream should be valid when using "
3111                        "GPU halo exchange.");
3112
3113     if (cr.dd->gpuHaloExchange[0].empty())
3114     {
3115         GMX_LOG(mdlog.warning)
3116                 .asParagraph()
3117                 .appendTextFormatted(
3118                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3119                         "by the "
3120                         "GMX_GPU_DD_COMMS environment variable.");
3121     }
3122
3123     for (int d = 0; d < cr.dd->ndim; d++)
3124     {
3125         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3126         {
3127             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3128                     cr.dd,
3129                     d,
3130                     cr.mpi_comm_mygroup,
3131                     deviceStreamManager.context(),
3132                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3133                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3134                     pulse,
3135                     wcycle));
3136         }
3137     }
3138 }
3139
3140 void reinitGpuHaloExchange(const t_commrec&              cr,
3141                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3142                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3143 {
3144     for (int d = 0; d < cr.dd->ndim; d++)
3145     {
3146         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3147         {
3148             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3149         }
3150     }
3151 }
3152
3153 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3154                                    const matrix          box,
3155                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3156 {
3157     for (int d = 0; d < cr.dd->ndim; d++)
3158     {
3159         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3160         {
3161             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3162         }
3163     }
3164 }
3165
3166 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3167 {
3168     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3169     {
3170         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3171         {
3172             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3173         }
3174     }
3175 }