src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/options.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/gpu_utils/gpu_utils.h"
  68 #include "gromacs/hardware/hw_info.h"
  69 #include "gromacs/listed_forces/manage_threading.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/math/vectypes.h"
  72 #include "gromacs/mdlib/calc_verletbuf.h"
  73 #include "gromacs/mdlib/constr.h"
  74 #include "gromacs/mdlib/constraintrange.h"
  75 #include "gromacs/mdlib/updategroups.h"
  76 #include "gromacs/mdlib/vsite.h"
  77 #include "gromacs/mdtypes/commrec.h"
  78 #include "gromacs/mdtypes/forceoutput.h"
  79 #include "gromacs/mdtypes/inputrec.h"
  80 #include "gromacs/mdtypes/mdrunoptions.h"
  81 #include "gromacs/mdtypes/state.h"
  82 #include "gromacs/pbcutil/ishift.h"
  83 #include "gromacs/pbcutil/pbc.h"
  84 #include "gromacs/pulling/pull.h"
  85 #include "gromacs/timing/wallcycle.h"
  86 #include "gromacs/topology/block.h"
  87 #include "gromacs/topology/idef.h"
  88 #include "gromacs/topology/ifunc.h"
  89 #include "gromacs/topology/mtop_lookup.h"
  90 #include "gromacs/topology/mtop_util.h"
  91 #include "gromacs/topology/topology.h"
  92 #include "gromacs/utility/basedefinitions.h"
  93 #include "gromacs/utility/basenetwork.h"
  94 #include "gromacs/utility/cstringutil.h"
  95 #include "gromacs/utility/exceptions.h"
  96 #include "gromacs/utility/fatalerror.h"
  97 #include "gromacs/utility/gmxmpi.h"
  98 #include "gromacs/utility/logger.h"
  99 #include "gromacs/utility/real.h"
 100 #include "gromacs/utility/smalloc.h"
 101 #include "gromacs/utility/strconvert.h"
 102 #include "gromacs/utility/stringstream.h"
 103 #include "gromacs/utility/stringutil.h"
 104 #include "gromacs/utility/textwriter.h"
 105
 106 #include "atomdistribution.h"
 107 #include "box.h"
 108 #include "cellsizes.h"
 109 #include "distribute.h"
 110 #include "domdec_constraints.h"
 111 #include "domdec_internal.h"
 112 #include "domdec_setup.h"
 113 #include "domdec_vsite.h"
 114 #include "redistribute.h"
 115 #include "utility.h"
 116
 117 // TODO remove this when moving domdec into gmx namespace
 118 using gmx::DdRankOrder;
 119 using gmx::DlbOption;
 120 using gmx::DomdecOptions;
 121
 122 static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 123
 124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_CGIBS 2
 126
 127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 128 #define DD_FLAG_NRCG 65535
 129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 131
 132 /* The DD zone order */
 133 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 134                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 135
 136 /* The non-bonded zone-pair setup for domain decomposition
 137  * The first number is the i-zone, the second number the first j-zone seen by
 138  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 139  * As is, this is for 3D decomposition, where there are 4 i-zones.
 140  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 141  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 142  */
 143 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 144                                                                { 1, 3, 6 },
 145                                                                { 2, 5, 6 },
 146                                                                { 3, 5, 7 } };
 147
 148 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 149 {
 150     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 151     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 152     xyz[ZZ] = ind % nc[ZZ];
 153 }
 154
 155 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 156 {
 157     int ddnodeid = -1;
 158
 159     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 160     const int                 ddindex   = dd_index(dd->numCells, c);
 161     if (cartSetup.bCartesianPP_PME)
 162     {
 163         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 164     }
 165     else if (cartSetup.bCartesianPP)
 166     {
 167 #if GMX_MPI
 168         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 169 #endif
 170     }
 171     else
 172     {
 173         ddnodeid = ddindex;
 174     }
 175
 176     return ddnodeid;
 177 }
 178
 179 int ddglatnr(const gmx_domdec_t* dd, int i)
 180 {
 181     int atnr;
 182
 183     if (dd == nullptr)
 184     {
 185         atnr = i + 1;
 186     }
 187     else
 188     {
 189         if (i >= dd->comm->atomRanges.numAtomsTotal())
 190         {
 191             gmx_fatal(FARGS,
 192                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 193                       i, dd->comm->atomRanges.numAtomsTotal());
 194         }
 195         atnr = dd->globalAtomIndices[i] + 1;
 196     }
 197
 198     return atnr;
 199 }
 200
 201 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 202 {
 203     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 204     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 205 }
 206
 207 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 208 {
 209     int i;
 210
 211     if (state->ddp_count != dd->ddp_count)
 212     {
 213         gmx_incons("The MD state does not match the domain decomposition state");
 214     }
 215
 216     state->cg_gl.resize(dd->ncg_home);
 217     for (i = 0; i < dd->ncg_home; i++)
 218     {
 219         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 220     }
 221
 222     state->ddp_count_cg_gl = dd->ddp_count;
 223 }
 224
 225 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 226 {
 227     return &dd->comm->zones;
 228 }
 229
 230 int dd_numAtomsZones(const gmx_domdec_t& dd)
 231 {
 232     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 233 }
 234
 235 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 236 {
 237     return dd.comm->atomRanges.numHomeAtoms();
 238 }
 239
 240 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 241 {
 242     /* We currently set mdatoms entries for all atoms:
 243      * local + non-local + communicated for vsite + constraints
 244      */
 245
 246     return dd->comm->atomRanges.numAtomsTotal();
 247 }
 248
 249 int dd_natoms_vsite(const gmx_domdec_t* dd)
 250 {
 251     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 252 }
 253
 254 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 255 {
 256     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 257     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 258 }
 259
 260 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 261 {
 262     wallcycle_start(wcycle, ewcMOVEX);
 263
 264     int                    nzone, nat_tot;
 265     gmx_domdec_comm_t*     comm;
 266     gmx_domdec_comm_dim_t* cd;
 267     rvec                   shift = { 0, 0, 0 };
 268     gmx_bool               bPBC, bScrew;
 269
 270     comm = dd->comm;
 271
 272     nzone   = 1;
 273     nat_tot = comm->atomRanges.numHomeAtoms();
 274     for (int d = 0; d < dd->ndim; d++)
 275     {
 276         bPBC   = (dd->ci[dd->dim[d]] == 0);
 277         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 278         if (bPBC)
 279         {
 280             copy_rvec(box[dd->dim[d]], shift);
 281         }
 282         cd = &comm->cd[d];
 283         for (const gmx_domdec_ind_t& ind : cd->ind)
 284         {
 285             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 286             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 287             int                       n          = 0;
 288             if (!bPBC)
 289             {
 290                 for (int j : ind.index)
 291                 {
 292                     sendBuffer[n] = x[j];
 293                     n++;
 294                 }
 295             }
 296             else if (!bScrew)
 297             {
 298                 for (int j : ind.index)
 299                 {
 300                     /* We need to shift the coordinates */
 301                     for (int d = 0; d < DIM; d++)
 302                     {
 303                         sendBuffer[n][d] = x[j][d] + shift[d];
 304                     }
 305                     n++;
 306                 }
 307             }
 308             else
 309             {
 310                 for (int j : ind.index)
 311                 {
 312                     /* Shift x */
 313                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 314                     /* Rotate y and z.
 315                      * This operation requires a special shift force
 316                      * treatment, which is performed in calc_vir.
 317                      */
 318                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 319                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 320                     n++;
 321                 }
 322             }
 323
 324             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 325                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 326
 327             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 328             if (cd->receiveInPlace)
 329             {
 330                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 331             }
 332             else
 333             {
 334                 receiveBuffer = receiveBufferAccess.buffer;
 335             }
 336             /* Send and receive the coordinates */
 337             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 338
 339             if (!cd->receiveInPlace)
 340             {
 341                 int j = 0;
 342                 for (int zone = 0; zone < nzone; zone++)
 343                 {
 344                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 345                     {
 346                         x[i] = receiveBuffer[j++];
 347                     }
 348                 }
 349             }
 350             nat_tot += ind.nrecv[nzone + 1];
 351         }
 352         nzone += nzone;
 353     }
 354
 355     wallcycle_stop(wcycle, ewcMOVEX);
 356 }
 357
 358 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 359 {
 360     wallcycle_start(wcycle, ewcMOVEF);
 361
 362     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 363     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 364
 365     gmx_domdec_comm_t& comm    = *dd->comm;
 366     int                nzone   = comm.zones.n / 2;
 367     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 368     for (int d = dd->ndim - 1; d >= 0; d--)
 369     {
 370         /* Only forces in domains near the PBC boundaries need to
 371            consider PBC in the treatment of fshift */
 372         const bool shiftForcesNeedPbc =
 373                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 374         const bool applyScrewPbc =
 375                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 376         /* Determine which shift vector we need */
 377         ivec vis        = { 0, 0, 0 };
 378         vis[dd->dim[d]] = 1;
 379         const int is    = IVEC2IS(vis);
 380
 381         /* Loop over the pulses */
 382         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 383         for (int p = cd.numPulses() - 1; p >= 0; p--)
 384         {
 385             const gmx_domdec_ind_t&   ind = cd.ind[p];
 386             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 387             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 388
 389             nat_tot -= ind.nrecv[nzone + 1];
 390
 391             DDBufferAccess<gmx::RVec> sendBufferAccess(
 392                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 393
 394             gmx::ArrayRef<gmx::RVec> sendBuffer;
 395             if (cd.receiveInPlace)
 396             {
 397                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 398             }
 399             else
 400             {
 401                 sendBuffer = sendBufferAccess.buffer;
 402                 int j      = 0;
 403                 for (int zone = 0; zone < nzone; zone++)
 404                 {
 405                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 406                     {
 407                         sendBuffer[j++] = f[i];
 408                     }
 409                 }
 410             }
 411             /* Communicate the forces */
 412             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 413             /* Add the received forces */
 414             int n = 0;
 415             if (!shiftForcesNeedPbc)
 416             {
 417                 for (int j : ind.index)
 418                 {
 419                     for (int d = 0; d < DIM; d++)
 420                     {
 421                         f[j][d] += receiveBuffer[n][d];
 422                     }
 423                     n++;
 424                 }
 425             }
 426             else if (!applyScrewPbc)
 427             {
 428                 for (int j : ind.index)
 429                 {
 430                     for (int d = 0; d < DIM; d++)
 431                     {
 432                         f[j][d] += receiveBuffer[n][d];
 433                     }
 434                     /* Add this force to the shift force */
 435                     for (int d = 0; d < DIM; d++)
 436                     {
 437                         fshift[is][d] += receiveBuffer[n][d];
 438                     }
 439                     n++;
 440                 }
 441             }
 442             else
 443             {
 444                 for (int j : ind.index)
 445                 {
 446                     /* Rotate the force */
 447                     f[j][XX] += receiveBuffer[n][XX];
 448                     f[j][YY] -= receiveBuffer[n][YY];
 449                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 450                     if (shiftForcesNeedPbc)
 451                     {
 452                         /* Add this force to the shift force */
 453                         for (int d = 0; d < DIM; d++)
 454                         {
 455                             fshift[is][d] += receiveBuffer[n][d];
 456                         }
 457                     }
 458                     n++;
 459                 }
 460             }
 461         }
 462         nzone /= 2;
 463     }
 464     wallcycle_stop(wcycle, ewcMOVEF);
 465 }
 466
 467 /* Convenience function for extracting a real buffer from an rvec buffer
 468  *
 469  * To reduce the number of temporary communication buffers and avoid
 470  * cache polution, we reuse gmx::RVec buffers for storing reals.
 471  * This functions return a real buffer reference with the same number
 472  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 473  */
 474 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 475 {
 476     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 477 }
 478
 479 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 480 {
 481     int                    nzone, nat_tot;
 482     gmx_domdec_comm_t*     comm;
 483     gmx_domdec_comm_dim_t* cd;
 484
 485     comm = dd->comm;
 486
 487     nzone   = 1;
 488     nat_tot = comm->atomRanges.numHomeAtoms();
 489     for (int d = 0; d < dd->ndim; d++)
 490     {
 491         cd = &comm->cd[d];
 492         for (const gmx_domdec_ind_t& ind : cd->ind)
 493         {
 494             /* Note: We provision for RVec instead of real, so a factor of 3
 495              * more than needed. The buffer actually already has this size
 496              * and we pass a plain pointer below, so this does not matter.
 497              */
 498             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 499             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 500             int                 n          = 0;
 501             for (int j : ind.index)
 502             {
 503                 sendBuffer[n++] = v[j];
 504             }
 505
 506             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 507                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 508
 509             gmx::ArrayRef<real> receiveBuffer;
 510             if (cd->receiveInPlace)
 511             {
 512                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 513             }
 514             else
 515             {
 516                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 517             }
 518             /* Send and receive the data */
 519             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 520             if (!cd->receiveInPlace)
 521             {
 522                 int j = 0;
 523                 for (int zone = 0; zone < nzone; zone++)
 524                 {
 525                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 526                     {
 527                         v[i] = receiveBuffer[j++];
 528                     }
 529                 }
 530             }
 531             nat_tot += ind.nrecv[nzone + 1];
 532         }
 533         nzone += nzone;
 534     }
 535 }
 536
 537 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 538 {
 539     int                    nzone, nat_tot;
 540     gmx_domdec_comm_t*     comm;
 541     gmx_domdec_comm_dim_t* cd;
 542
 543     comm = dd->comm;
 544
 545     nzone   = comm->zones.n / 2;
 546     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 547     for (int d = dd->ndim - 1; d >= 0; d--)
 548     {
 549         cd = &comm->cd[d];
 550         for (int p = cd->numPulses() - 1; p >= 0; p--)
 551         {
 552             const gmx_domdec_ind_t& ind = cd->ind[p];
 553
 554             /* Note: We provision for RVec instead of real, so a factor of 3
 555              * more than needed. The buffer actually already has this size
 556              * and we typecast, so this works as intended.
 557              */
 558             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 559             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 560             nat_tot -= ind.nrecv[nzone + 1];
 561
 562             DDBufferAccess<gmx::RVec> sendBufferAccess(
 563                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 564
 565             gmx::ArrayRef<real> sendBuffer;
 566             if (cd->receiveInPlace)
 567             {
 568                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 569             }
 570             else
 571             {
 572                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 573                 int j      = 0;
 574                 for (int zone = 0; zone < nzone; zone++)
 575                 {
 576                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 577                     {
 578                         sendBuffer[j++] = v[i];
 579                     }
 580                 }
 581             }
 582             /* Communicate the forces */
 583             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 584             /* Add the received forces */
 585             int n = 0;
 586             for (int j : ind.index)
 587             {
 588                 v[j] += receiveBuffer[n];
 589                 n++;
 590             }
 591         }
 592         nzone /= 2;
 593     }
 594 }
 595
 596 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 597 {
 598     const gmx_domdec_comm_t& comm       = *dd->comm;
 599     const DDSystemInfo&      systemInfo = comm.systemInfo;
 600
 601     real r = -1;
 602     if (systemInfo.haveInterDomainMultiBodyBondeds)
 603     {
 604         if (comm.cutoff_mbody > 0)
 605         {
 606             r = comm.cutoff_mbody;
 607         }
 608         else
 609         {
 610             /* cutoff_mbody=0 means we do not have DLB */
 611             r = comm.cellsize_min[dd->dim[0]];
 612             for (int di = 1; di < dd->ndim; di++)
 613             {
 614                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 615             }
 616             if (comm.systemInfo.filterBondedCommunication)
 617             {
 618                 r = std::max(r, comm.cutoff_mbody);
 619             }
 620             else
 621             {
 622                 r = std::min(r, systemInfo.cutoff);
 623             }
 624         }
 625     }
 626
 627     return r;
 628 }
 629
 630 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 631 {
 632     real r_mb;
 633
 634     r_mb = dd_cutoff_multibody(dd);
 635
 636     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 637 }
 638
 639
 640 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 641                                    const CartesianRankSetup& cartSetup,
 642                                    const ivec                coord,
 643                                    ivec                      coord_pme)
 644 {
 645     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 646     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 647     copy_ivec(coord, coord_pme);
 648     coord_pme[cartSetup.cartpmedim] =
 649             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 650 }
 651
 652 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 653 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 654 {
 655     const int npp  = ddRankSetup.numPPRanks;
 656     const int npme = ddRankSetup.numRanksDoingPme;
 657
 658     /* Here we assign a PME node to communicate with this DD node
 659      * by assuming that the major index of both is x.
 660      * We add npme/2 to obtain an even distribution.
 661      */
 662     return (ddCellIndex * npme + npme / 2) / npp;
 663 }
 664
 665 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 666 {
 667     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 668
 669     int n = 0;
 670     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 671     {
 672         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 673         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 674         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 675         {
 676             if (debug)
 677             {
 678                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 679             }
 680             pmeRanks[n] = i + 1 + n;
 681             n++;
 682         }
 683     }
 684
 685     return pmeRanks;
 686 }
 687
 688 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 689 {
 690     gmx_domdec_t* dd;
 691     ivec          coords;
 692     int           slab;
 693
 694     dd         = cr->dd;
 695     coords[XX] = x;
 696     coords[YY] = y;
 697     coords[ZZ] = z;
 698     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
 699
 700     return slab;
 701 }
 702
 703 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 704 {
 705     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 706     ivec                      coords    = { x, y, z };
 707     int                       nodeid    = -1;
 708
 709     if (cartSetup.bCartesianPP_PME)
 710     {
 711 #if GMX_MPI
 712         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 713 #endif
 714     }
 715     else
 716     {
 717         int ddindex = dd_index(cr->dd->numCells, coords);
 718         if (cartSetup.bCartesianPP)
 719         {
 720             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 721         }
 722         else
 723         {
 724             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 725             {
 726                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 727             }
 728             else
 729             {
 730                 nodeid = ddindex;
 731             }
 732         }
 733     }
 734
 735     return nodeid;
 736 }
 737
 738 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 739                               const CartesianRankSetup& cartSetup,
 740                               gmx::ArrayRef<const int>  pmeRanks,
 741                               const t_commrec gmx_unused* cr,
 742                               const int                   sim_nodeid)
 743 {
 744     int pmenode = -1;
 745
 746     /* This assumes a uniform x domain decomposition grid cell size */
 747     if (cartSetup.bCartesianPP_PME)
 748     {
 749 #if GMX_MPI
 750         ivec coord, coord_pme;
 751         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 752         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 753         {
 754             /* This is a PP rank */
 755             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 756             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 757         }
 758 #endif
 759     }
 760     else if (cartSetup.bCartesianPP)
 761     {
 762         if (sim_nodeid < ddRankSetup.numPPRanks)
 763         {
 764             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 765         }
 766     }
 767     else
 768     {
 769         /* This assumes DD cells with identical x coordinates
 770          * are numbered sequentially.
 771          */
 772         if (pmeRanks.empty())
 773         {
 774             if (sim_nodeid < ddRankSetup.numPPRanks)
 775             {
 776                 /* The DD index equals the nodeid */
 777                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 778             }
 779         }
 780         else
 781         {
 782             int i = 0;
 783             while (sim_nodeid > pmeRanks[i])
 784             {
 785                 i++;
 786             }
 787             if (sim_nodeid < pmeRanks[i])
 788             {
 789                 pmenode = pmeRanks[i];
 790             }
 791         }
 792     }
 793
 794     return pmenode;
 795 }
 796
 797 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 798 {
 799     if (dd != nullptr)
 800     {
 801         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 802     }
 803     else
 804     {
 805         return { 1, 1 };
 806     }
 807 }
 808
 809 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 810 {
 811     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 812     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 813     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 814                        "This function should only be called when PME-only ranks are in use");
 815     std::vector<int> ddranks;
 816     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 817
 818     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 819     {
 820         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 821         {
 822             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 823             {
 824                 if (cartSetup.bCartesianPP_PME)
 825                 {
 826                     ivec coord = { x, y, z };
 827                     ivec coord_pme;
 828                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 829                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 830                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 831                     {
 832                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 833                     }
 834                 }
 835                 else
 836                 {
 837                     /* The slab corresponds to the nodeid in the PME group */
 838                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 839                     {
 840                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 841                     }
 842                 }
 843             }
 844         }
 845     }
 846     return ddranks;
 847 }
 848
 849 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 850 {
 851     gmx_bool bReceive = TRUE;
 852
 853     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 854     if (ddRankSetup.usePmeOnlyRanks)
 855     {
 856         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 857         if (cartSetup.bCartesianPP_PME)
 858         {
 859 #if GMX_MPI
 860             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 861             ivec coords;
 862             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 863             coords[cartSetup.cartpmedim]++;
 864             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 865             {
 866                 int rank;
 867                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 868                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 869                 {
 870                     /* This is not the last PP node for pmenode */
 871                     bReceive = FALSE;
 872                 }
 873             }
 874 #else
 875             GMX_RELEASE_ASSERT(
 876                     false,
 877                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 878 #endif
 879         }
 880         else
 881         {
 882             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 883             if (cr->sim_nodeid + 1 < cr->nnodes
 884                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 885             {
 886                 /* This is not the last PP node for pmenode */
 887                 bReceive = FALSE;
 888             }
 889         }
 890     }
 891
 892     return bReceive;
 893 }
 894
 895 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 896 {
 897     gmx_domdec_comm_t* comm;
 898     int                i;
 899
 900     comm = dd->comm;
 901
 902     snew(*dim_f, dd->numCells[dim] + 1);
 903     (*dim_f)[0] = 0;
 904     for (i = 1; i < dd->numCells[dim]; i++)
 905     {
 906         if (comm->slb_frac[dim])
 907         {
 908             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 909         }
 910         else
 911         {
 912             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 913         }
 914     }
 915     (*dim_f)[dd->numCells[dim]] = 1;
 916 }
 917
 918 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 919 {
 920     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 921
 922     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 923     {
 924         ddpme->dim = YY;
 925     }
 926     else
 927     {
 928         ddpme->dim = dimind;
 929     }
 930     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 931
 932     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 933
 934     if (ddpme->nslab <= 1)
 935     {
 936         return;
 937     }
 938
 939     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 940     /* Determine for each PME slab the PP location range for dimension dim */
 941     snew(ddpme->pp_min, ddpme->nslab);
 942     snew(ddpme->pp_max, ddpme->nslab);
 943     for (int slab = 0; slab < ddpme->nslab; slab++)
 944     {
 945         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 946         ddpme->pp_max[slab] = 0;
 947     }
 948     for (int i = 0; i < dd->nnodes; i++)
 949     {
 950         ivec xyz;
 951         ddindex2xyz(dd->numCells, i, xyz);
 952         /* For y only use our y/z slab.
 953          * This assumes that the PME x grid size matches the DD grid size.
 954          */
 955         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 956         {
 957             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 958             int       slab;
 959             if (dimind == 0)
 960             {
 961                 slab = pmeindex / nso;
 962             }
 963             else
 964             {
 965                 slab = pmeindex % ddpme->nslab;
 966             }
 967             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 968             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 969         }
 970     }
 971
 972     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 973 }
 974
 975 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
 976 {
 977     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 978
 979     if (ddRankSetup.ddpme[0].dim == XX)
 980     {
 981         return ddRankSetup.ddpme[0].maxshift;
 982     }
 983     else
 984     {
 985         return 0;
 986     }
 987 }
 988
 989 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
 990 {
 991     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 992
 993     if (ddRankSetup.ddpme[0].dim == YY)
 994     {
 995         return ddRankSetup.ddpme[0].maxshift;
 996     }
 997     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 998     {
 999         return ddRankSetup.ddpme[1].maxshift;
1000     }
1001     else
1002     {
1003         return 0;
1004     }
1005 }
1006
1007 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1008 {
1009     return dd.comm->systemInfo.haveSplitConstraints;
1010 }
1011
1012 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1013 {
1014     return dd.comm->systemInfo.useUpdateGroups;
1015 }
1016
1017 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1018 {
1019     /* Note that the cycles value can be incorrect, either 0 or some
1020      * extremely large value, when our thread migrated to another core
1021      * with an unsynchronized cycle counter. If this happens less often
1022      * that once per nstlist steps, this will not cause issues, since
1023      * we later subtract the maximum value from the sum over nstlist steps.
1024      * A zero count will slightly lower the total, but that's a small effect.
1025      * Note that the main purpose of the subtraction of the maximum value
1026      * is to avoid throwing off the load balancing when stalls occur due
1027      * e.g. system activity or network congestion.
1028      */
1029     dd->comm->cycl[ddCycl] += cycles;
1030     dd->comm->cycl_n[ddCycl]++;
1031     if (cycles > dd->comm->cycl_max[ddCycl])
1032     {
1033         dd->comm->cycl_max[ddCycl] = cycles;
1034     }
1035 }
1036
1037 #if GMX_MPI
1038 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1039 {
1040     MPI_Comm c_row;
1041     int      dim, i, rank;
1042     ivec     loc_c;
1043     gmx_bool bPartOfGroup = FALSE;
1044
1045     dim = dd->dim[dim_ind];
1046     copy_ivec(loc, loc_c);
1047     for (i = 0; i < dd->numCells[dim]; i++)
1048     {
1049         loc_c[dim] = i;
1050         rank       = dd_index(dd->numCells, loc_c);
1051         if (rank == dd->rank)
1052         {
1053             /* This process is part of the group */
1054             bPartOfGroup = TRUE;
1055         }
1056     }
1057     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1058     if (bPartOfGroup)
1059     {
1060         dd->comm->mpi_comm_load[dim_ind] = c_row;
1061         if (!isDlbDisabled(dd->comm))
1062         {
1063             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1064
1065             if (dd->ci[dim] == dd->master_ci[dim])
1066             {
1067                 /* This is the root process of this row */
1068                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1069
1070                 RowMaster& rowMaster = *cellsizes.rowMaster;
1071                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1072                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1073                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1074                 if (dim_ind > 0)
1075                 {
1076                     rowMaster.bounds.resize(dd->numCells[dim]);
1077                 }
1078                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1079             }
1080             else
1081             {
1082                 /* This is not a root process, we only need to receive cell_f */
1083                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1084             }
1085         }
1086         if (dd->ci[dim] == dd->master_ci[dim])
1087         {
1088             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1089         }
1090     }
1091 }
1092 #endif
1093
1094 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1095 {
1096 #if GMX_MPI
1097     int           physicalnode_id_hash;
1098     gmx_domdec_t* dd;
1099     MPI_Comm      mpi_comm_pp_physicalnode;
1100
1101     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1102     {
1103         /* Only ranks with short-ranged tasks (currently) use GPUs.
1104          * If we don't have GPUs assigned, there are no resources to share.
1105          */
1106         return;
1107     }
1108
1109     physicalnode_id_hash = gmx_physicalnode_id_hash();
1110
1111     dd = cr->dd;
1112
1113     if (debug)
1114     {
1115         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1116         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1117                 physicalnode_id_hash, gpu_id);
1118     }
1119     /* Split the PP communicator over the physical nodes */
1120     /* TODO: See if we should store this (before), as it's also used for
1121      * for the nodecomm summation.
1122      */
1123     // TODO PhysicalNodeCommunicator could be extended/used to handle
1124     // the need for per-node per-group communicators.
1125     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1126     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1127     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1128     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1129
1130     if (debug)
1131     {
1132         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1133     }
1134
1135     /* Note that some ranks could share a GPU, while others don't */
1136
1137     if (dd->comm->nrank_gpu_shared == 1)
1138     {
1139         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1140     }
1141 #else
1142     GMX_UNUSED_VALUE(cr);
1143     GMX_UNUSED_VALUE(gpu_id);
1144 #endif
1145 }
1146
1147 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1148 {
1149 #if GMX_MPI
1150     int  dim0, dim1, i, j;
1151     ivec loc;
1152
1153     if (debug)
1154     {
1155         fprintf(debug, "Making load communicators\n");
1156     }
1157
1158     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1159     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1160
1161     if (dd->ndim == 0)
1162     {
1163         return;
1164     }
1165
1166     clear_ivec(loc);
1167     make_load_communicator(dd, 0, loc);
1168     if (dd->ndim > 1)
1169     {
1170         dim0 = dd->dim[0];
1171         for (i = 0; i < dd->numCells[dim0]; i++)
1172         {
1173             loc[dim0] = i;
1174             make_load_communicator(dd, 1, loc);
1175         }
1176     }
1177     if (dd->ndim > 2)
1178     {
1179         dim0 = dd->dim[0];
1180         for (i = 0; i < dd->numCells[dim0]; i++)
1181         {
1182             loc[dim0] = i;
1183             dim1      = dd->dim[1];
1184             for (j = 0; j < dd->numCells[dim1]; j++)
1185             {
1186                 loc[dim1] = j;
1187                 make_load_communicator(dd, 2, loc);
1188             }
1189         }
1190     }
1191
1192     if (debug)
1193     {
1194         fprintf(debug, "Finished making load communicators\n");
1195     }
1196 #endif
1197 }
1198
1199 /*! \brief Sets up the relation between neighboring domains and zones */
1200 static void setup_neighbor_relations(gmx_domdec_t* dd)
1201 {
1202     int                 d, dim, m;
1203     ivec                tmp, s;
1204     gmx_domdec_zones_t* zones;
1205     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1206
1207     for (d = 0; d < dd->ndim; d++)
1208     {
1209         dim = dd->dim[d];
1210         copy_ivec(dd->ci, tmp);
1211         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1212         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1213         copy_ivec(dd->ci, tmp);
1214         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1215         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1216         if (debug)
1217         {
1218             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1219                     dd->neighbor[d][0], dd->neighbor[d][1]);
1220         }
1221     }
1222
1223     int nzone  = (1 << dd->ndim);
1224     int nizone = (1 << std::max(dd->ndim - 1, 0));
1225     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1226
1227     zones = &dd->comm->zones;
1228
1229     for (int i = 0; i < nzone; i++)
1230     {
1231         m = 0;
1232         clear_ivec(zones->shift[i]);
1233         for (d = 0; d < dd->ndim; d++)
1234         {
1235             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1236         }
1237     }
1238
1239     zones->n = nzone;
1240     for (int i = 0; i < nzone; i++)
1241     {
1242         for (d = 0; d < DIM; d++)
1243         {
1244             s[d] = dd->ci[d] - zones->shift[i][d];
1245             if (s[d] < 0)
1246             {
1247                 s[d] += dd->numCells[d];
1248             }
1249             else if (s[d] >= dd->numCells[d])
1250             {
1251                 s[d] -= dd->numCells[d];
1252             }
1253         }
1254     }
1255     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1256     {
1257         GMX_RELEASE_ASSERT(
1258                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1259                 "The first element for each ddNonbondedZonePairRanges should match its index");
1260
1261         DDPairInteractionRanges iZone;
1262         iZone.iZoneIndex = iZoneIndex;
1263         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1264          * j-zones up to nzone.
1265          */
1266         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1267                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1268         for (dim = 0; dim < DIM; dim++)
1269         {
1270             if (dd->numCells[dim] == 1)
1271             {
1272                 /* All shifts should be allowed */
1273                 iZone.shift0[dim] = -1;
1274                 iZone.shift1[dim] = 1;
1275             }
1276             else
1277             {
1278                 /* Determine the min/max j-zone shift wrt the i-zone */
1279                 iZone.shift0[dim] = 1;
1280                 iZone.shift1[dim] = -1;
1281                 for (int jZone : iZone.jZoneRange)
1282                 {
1283                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1284                     if (shift_diff < iZone.shift0[dim])
1285                     {
1286                         iZone.shift0[dim] = shift_diff;
1287                     }
1288                     if (shift_diff > iZone.shift1[dim])
1289                     {
1290                         iZone.shift1[dim] = shift_diff;
1291                     }
1292                 }
1293             }
1294         }
1295
1296         zones->iZones.push_back(iZone);
1297     }
1298
1299     if (!isDlbDisabled(dd->comm))
1300     {
1301         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1302     }
1303
1304     if (dd->comm->ddSettings.recordLoad)
1305     {
1306         make_load_communicators(dd);
1307     }
1308 }
1309
1310 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1311                                  gmx_domdec_t*        dd,
1312                                  t_commrec gmx_unused* cr,
1313                                  bool gmx_unused reorder)
1314 {
1315 #if GMX_MPI
1316     gmx_domdec_comm_t*  comm      = dd->comm;
1317     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1318
1319     if (cartSetup.bCartesianPP)
1320     {
1321         /* Set up cartesian communication for the particle-particle part */
1322         GMX_LOG(mdlog.info)
1323                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1324                                      dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
1325
1326         ivec periods;
1327         for (int i = 0; i < DIM; i++)
1328         {
1329             periods[i] = TRUE;
1330         }
1331         MPI_Comm comm_cart;
1332         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
1333                         &comm_cart);
1334         /* We overwrite the old communicator with the new cartesian one */
1335         cr->mpi_comm_mygroup = comm_cart;
1336     }
1337
1338     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1339     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1340
1341     if (cartSetup.bCartesianPP_PME)
1342     {
1343         /* Since we want to use the original cartesian setup for sim,
1344          * and not the one after split, we need to make an index.
1345          */
1346         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1347         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1348         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1349         /* Get the rank of the DD master,
1350          * above we made sure that the master node is a PP node.
1351          */
1352         int rank;
1353         if (MASTER(cr))
1354         {
1355             rank = dd->rank;
1356         }
1357         else
1358         {
1359             rank = 0;
1360         }
1361         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1362     }
1363     else if (cartSetup.bCartesianPP)
1364     {
1365         if (!comm->ddRankSetup.usePmeOnlyRanks)
1366         {
1367             /* The PP communicator is also
1368              * the communicator for this simulation
1369              */
1370             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1371         }
1372         cr->nodeid = dd->rank;
1373
1374         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1375
1376         /* We need to make an index to go from the coordinates
1377          * to the nodeid of this simulation.
1378          */
1379         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1380         std::vector<int> buf(dd->nnodes);
1381         if (thisRankHasDuty(cr, DUTY_PP))
1382         {
1383             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1384         }
1385         /* Communicate the ddindex to simulation nodeid index */
1386         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1387                       cr->mpi_comm_mysim);
1388
1389         /* Determine the master coordinates and rank.
1390          * The DD master should be the same node as the master of this sim.
1391          */
1392         for (int i = 0; i < dd->nnodes; i++)
1393         {
1394             if (cartSetup.ddindex2simnodeid[i] == 0)
1395             {
1396                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1397                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1398             }
1399         }
1400         if (debug)
1401         {
1402             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1403         }
1404     }
1405     else
1406     {
1407         /* No Cartesian communicators */
1408         /* We use the rank in dd->comm->all as DD index */
1409         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1410         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1411         dd->masterrank = 0;
1412         clear_ivec(dd->master_ci);
1413     }
1414 #endif
1415
1416     GMX_LOG(mdlog.info)
1417             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1418                                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1419     if (debug)
1420     {
1421         fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1422                 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1423     }
1424 }
1425
1426 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1427 {
1428 #if GMX_MPI
1429     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1430
1431     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1432     {
1433         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1434         std::vector<int> buf(dd->nnodes);
1435         if (thisRankHasDuty(cr, DUTY_PP))
1436         {
1437             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1438         }
1439         /* Communicate the ddindex to simulation nodeid index */
1440         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1441                       cr->mpi_comm_mysim);
1442     }
1443 #else
1444     GMX_UNUSED_VALUE(dd);
1445     GMX_UNUSED_VALUE(cr);
1446 #endif
1447 }
1448
1449 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1450                                              t_commrec*           cr,
1451                                              const DdRankOrder    ddRankOrder,
1452                                              bool gmx_unused    reorder,
1453                                              const DDRankSetup& ddRankSetup,
1454                                              ivec               ddCellIndex,
1455                                              std::vector<int>*  pmeRanks)
1456 {
1457     CartesianRankSetup cartSetup;
1458
1459     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1460     cartSetup.bCartesianPP_PME = false;
1461
1462     const ivec& numDDCells = ddRankSetup.numPPCells;
1463     /* Initially we set ntot to the number of PP cells */
1464     copy_ivec(numDDCells, cartSetup.ntot);
1465
1466     if (cartSetup.bCartesianPP)
1467     {
1468         const int numDDCellsTot = ddRankSetup.numPPRanks;
1469         bool      bDiv[DIM];
1470         for (int i = 1; i < DIM; i++)
1471         {
1472             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1473         }
1474         if (bDiv[YY] || bDiv[ZZ])
1475         {
1476             cartSetup.bCartesianPP_PME = TRUE;
1477             /* If we have 2D PME decomposition, which is always in x+y,
1478              * we stack the PME only nodes in z.
1479              * Otherwise we choose the direction that provides the thinnest slab
1480              * of PME only nodes as this will have the least effect
1481              * on the PP communication.
1482              * But for the PME communication the opposite might be better.
1483              */
1484             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1485             {
1486                 cartSetup.cartpmedim = ZZ;
1487             }
1488             else
1489             {
1490                 cartSetup.cartpmedim = YY;
1491             }
1492             cartSetup.ntot[cartSetup.cartpmedim] +=
1493                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1494         }
1495         else
1496         {
1497             GMX_LOG(mdlog.info)
1498                     .appendTextFormatted(
1499                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1500                             "nx*nz (%d*%d)",
1501                             ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1502                             numDDCells[XX], numDDCells[ZZ]);
1503             GMX_LOG(mdlog.info)
1504                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1505         }
1506     }
1507
1508     if (cartSetup.bCartesianPP_PME)
1509     {
1510 #if GMX_MPI
1511         int  rank;
1512         ivec periods;
1513
1514         GMX_LOG(mdlog.info)
1515                 .appendTextFormatted(
1516                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1517                         cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1518
1519         for (int i = 0; i < DIM; i++)
1520         {
1521             periods[i] = TRUE;
1522         }
1523         MPI_Comm comm_cart;
1524         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1525                         &comm_cart);
1526         MPI_Comm_rank(comm_cart, &rank);
1527         if (MASTER(cr) && rank != 0)
1528         {
1529             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1530         }
1531
1532         /* With this assigment we loose the link to the original communicator
1533          * which will usually be MPI_COMM_WORLD, unless have multisim.
1534          */
1535         cr->mpi_comm_mysim = comm_cart;
1536         cr->sim_nodeid     = rank;
1537
1538         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1539
1540         GMX_LOG(mdlog.info)
1541                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1542                                      ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1543
1544         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1545         {
1546             cr->duty = DUTY_PP;
1547         }
1548         if (!ddRankSetup.usePmeOnlyRanks
1549             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1550         {
1551             cr->duty = DUTY_PME;
1552         }
1553
1554         /* Split the sim communicator into PP and PME only nodes */
1555         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1556                        dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1557 #else
1558         GMX_UNUSED_VALUE(ddCellIndex);
1559 #endif
1560     }
1561     else
1562     {
1563         switch (ddRankOrder)
1564         {
1565             case DdRankOrder::pp_pme:
1566                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1567                 break;
1568             case DdRankOrder::interleave:
1569                 /* Interleave the PP-only and PME-only ranks */
1570                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1571                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1572                 break;
1573             case DdRankOrder::cartesian: break;
1574             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1575         }
1576
1577         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1578         {
1579             cr->duty = DUTY_PME;
1580         }
1581         else
1582         {
1583             cr->duty = DUTY_PP;
1584         }
1585 #if GMX_MPI
1586         /* Split the sim communicator into PP and PME only nodes */
1587         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1588         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1589 #endif
1590     }
1591
1592     GMX_LOG(mdlog.info)
1593             .appendTextFormatted("This rank does only %s work.\n",
1594                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1595
1596     return cartSetup;
1597 }
1598
1599 /*! \brief Makes the PP communicator and the PME communicator, when needed
1600  *
1601  * Returns the Cartesian rank setup.
1602  * Sets \p cr->mpi_comm_mygroup
1603  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1604  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1605  */
1606 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1607                                                  const DDSettings&    ddSettings,
1608                                                  const DdRankOrder    ddRankOrder,
1609                                                  const DDRankSetup&   ddRankSetup,
1610                                                  t_commrec*           cr,
1611                                                  ivec                 ddCellIndex,
1612                                                  std::vector<int>*    pmeRanks)
1613 {
1614     CartesianRankSetup cartSetup;
1615
1616     if (ddRankSetup.usePmeOnlyRanks)
1617     {
1618         /* Split the communicator into a PP and PME part */
1619         cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1620                                        ddRankSetup, ddCellIndex, pmeRanks);
1621     }
1622     else
1623     {
1624         /* All nodes do PP and PME */
1625         /* We do not require separate communicators */
1626         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1627
1628         cartSetup.bCartesianPP     = false;
1629         cartSetup.bCartesianPP_PME = false;
1630     }
1631
1632     return cartSetup;
1633 }
1634
1635 /*! \brief For PP ranks, sets or makes the communicator
1636  *
1637  * For PME ranks get the rank id.
1638  * For PP only ranks, sets the PME-only rank.
1639  */
1640 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1641                                     const DDSettings&        ddSettings,
1642                                     gmx::ArrayRef<const int> pmeRanks,
1643                                     t_commrec*               cr,
1644                                     const int                numAtomsInSystem,
1645                                     gmx_domdec_t*            dd)
1646 {
1647     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1648     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1649
1650     if (thisRankHasDuty(cr, DUTY_PP))
1651     {
1652         /* Copy or make a new PP communicator */
1653
1654         /* We (possibly) reordered the nodes in split_communicator,
1655          * so it is no longer required in make_pp_communicator.
1656          */
1657         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1658
1659         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1660     }
1661     else
1662     {
1663         receive_ddindex2simnodeid(dd, cr);
1664     }
1665
1666     if (!thisRankHasDuty(cr, DUTY_PME))
1667     {
1668         /* Set up the commnuication to our PME node */
1669         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1670         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1671         if (debug)
1672         {
1673             fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1674                     gmx::boolToString(dd->pme_receive_vir_ener));
1675         }
1676     }
1677     else
1678     {
1679         dd->pme_nodeid = -1;
1680     }
1681
1682     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1683     if (MASTER(cr))
1684     {
1685         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1686     }
1687 }
1688
1689 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1690 {
1691     real * slb_frac, tot;
1692     int    i, n;
1693     double dbl;
1694
1695     slb_frac = nullptr;
1696     if (nc > 1 && size_string != nullptr)
1697     {
1698         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1699         snew(slb_frac, nc);
1700         tot = 0;
1701         for (i = 0; i < nc; i++)
1702         {
1703             dbl = 0;
1704             sscanf(size_string, "%20lf%n", &dbl, &n);
1705             if (dbl == 0)
1706             {
1707                 gmx_fatal(FARGS,
1708                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1709                           dir, size_string);
1710             }
1711             slb_frac[i] = dbl;
1712             size_string += n;
1713             tot += slb_frac[i];
1714         }
1715         /* Normalize */
1716         std::string relativeCellSizes = "Relative cell sizes:";
1717         for (i = 0; i < nc; i++)
1718         {
1719             slb_frac[i] /= tot;
1720             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1721         }
1722         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1723     }
1724
1725     return slb_frac;
1726 }
1727
1728 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1729 {
1730     int                  n     = 0;
1731     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1732     int                  nmol;
1733     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1734     {
1735         for (auto& ilist : extractILists(*ilists, IF_BOND))
1736         {
1737             if (NRAL(ilist.functionType) > 2)
1738             {
1739                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1740             }
1741         }
1742     }
1743
1744     return n;
1745 }
1746
1747 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1748 {
1749     char* val;
1750     int   nst;
1751
1752     nst = def;
1753     val = getenv(env_var);
1754     if (val)
1755     {
1756         if (sscanf(val, "%20d", &nst) <= 0)
1757         {
1758             nst = 1;
1759         }
1760         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1761     }
1762
1763     return nst;
1764 }
1765
1766 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1767 {
1768     if (ir->pbcType == PbcType::Screw
1769         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1770     {
1771         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1772                   c_pbcTypeNames[ir->pbcType].c_str());
1773     }
1774
1775     if (ir->nstlist == 0)
1776     {
1777         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1778     }
1779
1780     if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1781     {
1782         GMX_LOG(mdlog.warning)
1783                 .appendText(
1784                         "comm-mode angular will give incorrect results when the comm group "
1785                         "partially crosses a periodic boundary");
1786     }
1787 }
1788
1789 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1790 {
1791     real r = ddbox.box_size[XX];
1792     for (int d = 0; d < DIM; d++)
1793     {
1794         if (numDomains[d] > 1)
1795         {
1796             /* Check using the initial average cell size */
1797             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1798         }
1799     }
1800
1801     return r;
1802 }
1803
1804 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1805  */
1806 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1807                                   const std::string&   reasonStr,
1808                                   const gmx::MDLogger& mdlog)
1809 {
1810     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1811     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1812
1813     if (cmdlineDlbState == DlbState::onUser)
1814     {
1815         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1816     }
1817     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1818     {
1819         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1820     }
1821     return DlbState::offForever;
1822 }
1823
1824 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1825  *
1826  * This function parses the parameters of "-dlb" command line option setting
1827  * corresponding state values. Then it checks the consistency of the determined
1828  * state with other run parameters and settings. As a result, the initial state
1829  * may be altered or an error may be thrown if incompatibility of options is detected.
1830  *
1831  * \param [in] mdlog       Logger.
1832  * \param [in] dlbOption   Enum value for the DLB option.
1833  * \param [in] bRecordLoad True if the load balancer is recording load information.
1834  * \param [in] mdrunOptions  Options for mdrun.
1835  * \param [in] ir          Pointer mdrun to input parameters.
1836  * \returns                DLB initial/startup state.
1837  */
1838 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1839                                          DlbOption                dlbOption,
1840                                          gmx_bool                 bRecordLoad,
1841                                          const gmx::MdrunOptions& mdrunOptions,
1842                                          const t_inputrec*        ir)
1843 {
1844     DlbState dlbState = DlbState::offCanTurnOn;
1845
1846     switch (dlbOption)
1847     {
1848         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1849         case DlbOption::no: dlbState = DlbState::offUser; break;
1850         case DlbOption::yes: dlbState = DlbState::onUser; break;
1851         default: gmx_incons("Invalid dlbOption enum value");
1852     }
1853
1854     /* Reruns don't support DLB: bail or override auto mode */
1855     if (mdrunOptions.rerun)
1856     {
1857         std::string reasonStr = "it is not supported in reruns.";
1858         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1859     }
1860
1861     /* Unsupported integrators */
1862     if (!EI_DYNAMICS(ir->eI))
1863     {
1864         auto reasonStr = gmx::formatString(
1865                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1866         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1867     }
1868
1869     /* Without cycle counters we can't time work to balance on */
1870     if (!bRecordLoad)
1871     {
1872         std::string reasonStr =
1873                 "cycle counters unsupported or not enabled in the operating system kernel.";
1874         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1875     }
1876
1877     if (mdrunOptions.reproducible)
1878     {
1879         std::string reasonStr = "you started a reproducible run.";
1880         switch (dlbState)
1881         {
1882             case DlbState::offUser: break;
1883             case DlbState::offForever:
1884                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1885                 break;
1886             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1887             case DlbState::onCanTurnOff:
1888                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1889                 break;
1890             case DlbState::onUser:
1891                 return forceDlbOffOrBail(
1892                         dlbState,
1893                         reasonStr
1894                                 + " In load balanced runs binary reproducibility cannot be "
1895                                   "ensured.",
1896                         mdlog);
1897             default:
1898                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1899                           static_cast<int>(dlbState));
1900         }
1901     }
1902
1903     return dlbState;
1904 }
1905
1906 static gmx_domdec_comm_t* init_dd_comm()
1907 {
1908     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1909
1910     comm->n_load_have    = 0;
1911     comm->n_load_collect = 0;
1912
1913     comm->haveTurnedOffDlb = false;
1914
1915     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1916     {
1917         comm->sum_nat[i] = 0;
1918     }
1919     comm->ndecomp   = 0;
1920     comm->nload     = 0;
1921     comm->load_step = 0;
1922     comm->load_sum  = 0;
1923     comm->load_max  = 0;
1924     clear_ivec(comm->load_lim);
1925     comm->load_mdf = 0;
1926     comm->load_pme = 0;
1927
1928     /* This should be replaced by a unique pointer */
1929     comm->balanceRegion = ddBalanceRegionAllocate();
1930
1931     return comm;
1932 }
1933
1934 /* Returns whether mtop contains constraints and/or vsites */
1935 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1936 {
1937     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1938     int  nmol;
1939     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1940     {
1941         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1942         {
1943             return true;
1944         }
1945     }
1946
1947     return false;
1948 }
1949
1950 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1951                               const gmx_mtop_t&    mtop,
1952                               const t_inputrec&    inputrec,
1953                               const real           cutoffMargin,
1954                               DDSystemInfo*        systemInfo)
1955 {
1956     /* When we have constraints and/or vsites, it is beneficial to use
1957      * update groups (when possible) to allow independent update of groups.
1958      */
1959     if (!systemHasConstraintsOrVsites(mtop))
1960     {
1961         /* No constraints or vsites, atoms can be updated independently */
1962         return;
1963     }
1964
1965     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1966     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1967                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1968
1969     if (systemInfo->useUpdateGroups)
1970     {
1971         int numUpdateGroups = 0;
1972         for (const auto& molblock : mtop.molblock)
1973         {
1974             numUpdateGroups += molblock.nmol
1975                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1976         }
1977
1978         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1979                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1980
1981         /* To use update groups, the large domain-to-domain cutoff distance
1982          * should be compatible with the box size.
1983          */
1984         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1985
1986         if (systemInfo->useUpdateGroups)
1987         {
1988             GMX_LOG(mdlog.info)
1989                     .appendTextFormatted(
1990                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1991                             "nm\n",
1992                             numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
1993                             systemInfo->maxUpdateGroupRadius);
1994         }
1995         else
1996         {
1997             GMX_LOG(mdlog.info)
1998                     .appendTextFormatted(
1999                             "The combination of rlist and box size prohibits the use of update "
2000                             "groups\n");
2001             systemInfo->updateGroupingPerMoleculetype.clear();
2002         }
2003     }
2004 }
2005
2006 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2007     npbcdim(numPbcDimensions(ir.pbcType)),
2008     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2009     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2010     haveScrewPBC(ir.pbcType == PbcType::Screw)
2011 {
2012 }
2013
2014 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2015 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2016                                     const bool                                  useUpdateGroups,
2017                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2018 {
2019     if (useUpdateGroups)
2020     {
2021         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2022                            "Need one grouping per moltype");
2023         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2024         {
2025             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2026             {
2027                 return false;
2028             }
2029         }
2030     }
2031     else
2032     {
2033         for (const auto& moltype : mtop.moltype)
2034         {
2035             if (moltype.atoms.nr > 1)
2036             {
2037                 return false;
2038             }
2039         }
2040     }
2041
2042     return true;
2043 }
2044
2045 /*! \brief Generate the simulation system information */
2046 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2047                                   const t_commrec*               cr,
2048                                   const DomdecOptions&           options,
2049                                   const gmx_mtop_t&              mtop,
2050                                   const t_inputrec&              ir,
2051                                   const matrix                   box,
2052                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2053 {
2054     const real tenPercentMargin = 1.1;
2055
2056     DDSystemInfo systemInfo;
2057
2058     /* We need to decide on update groups early, as this affects communication distances */
2059     systemInfo.useUpdateGroups = false;
2060     if (ir.cutoff_scheme == ecutsVERLET)
2061     {
2062         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2063         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2064     }
2065
2066     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2067             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2068     systemInfo.haveInterDomainBondeds =
2069             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2070     systemInfo.haveInterDomainMultiBodyBondeds =
2071             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2072
2073     if (systemInfo.useUpdateGroups)
2074     {
2075         systemInfo.haveSplitConstraints = false;
2076         systemInfo.haveSplitSettles     = false;
2077     }
2078     else
2079     {
2080         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2081                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2082         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2083     }
2084
2085     if (ir.rlist == 0)
2086     {
2087         /* Set the cut-off to some very large value,
2088          * so we don't need if statements everywhere in the code.
2089          * We use sqrt, since the cut-off is squared in some places.
2090          */
2091         systemInfo.cutoff = GMX_CUTOFF_INF;
2092     }
2093     else
2094     {
2095         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2096     }
2097     systemInfo.minCutoffForMultiBody = 0;
2098
2099     /* Determine the minimum cell size limit, affected by many factors */
2100     systemInfo.cellsizeLimit             = 0;
2101     systemInfo.filterBondedCommunication = false;
2102
2103     /* We do not allow home atoms to move beyond the neighboring domain
2104      * between domain decomposition steps, which limits the cell size.
2105      * Get an estimate of cell size limit due to atom displacement.
2106      * In most cases this is a large overestimate, because it assumes
2107      * non-interaction atoms.
2108      * We set the chance to 1 in a trillion steps.
2109      */
2110     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2111     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2112             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2113     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2114
2115     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2116
2117     /* TODO: PME decomposition currently requires atoms not to be more than
2118      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2119      *       In nearly all cases, limitForAtomDisplacement will be smaller
2120      *       than 2/3*rlist, so the PME requirement is satisfied.
2121      *       But it would be better for both correctness and performance
2122      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2123      *       Note that we would need to improve the pairlist buffer case.
2124      */
2125
2126     if (systemInfo.haveInterDomainBondeds)
2127     {
2128         if (options.minimumCommunicationRange > 0)
2129         {
2130             systemInfo.minCutoffForMultiBody =
2131                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2132             if (options.useBondedCommunication)
2133             {
2134                 systemInfo.filterBondedCommunication =
2135                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2136             }
2137             else
2138             {
2139                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2140             }
2141         }
2142         else if (ir.bPeriodicMols)
2143         {
2144             /* Can not easily determine the required cut-off */
2145             GMX_LOG(mdlog.warning)
2146                     .appendText(
2147                             "NOTE: Periodic molecules are present in this system. Because of this, "
2148                             "the domain decomposition algorithm cannot easily determine the "
2149                             "minimum cell size that it requires for treating bonded interactions. "
2150                             "Instead, domain decomposition will assume that half the non-bonded "
2151                             "cut-off will be a suitable lower bound.");
2152             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2153         }
2154         else
2155         {
2156             real r_2b, r_mb;
2157
2158             if (MASTER(cr))
2159             {
2160                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2161                                       options.checkBondedInteractions, &r_2b, &r_mb);
2162             }
2163             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2164             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2165
2166             /* We use an initial margin of 10% for the minimum cell size,
2167              * except when we are just below the non-bonded cut-off.
2168              */
2169             if (options.useBondedCommunication)
2170             {
2171                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2172                 {
2173                     const real r_bonded              = std::max(r_2b, r_mb);
2174                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2175                     /* This is the (only) place where we turn on the filtering */
2176                     systemInfo.filterBondedCommunication = true;
2177                 }
2178                 else
2179                 {
2180                     const real r_bonded = r_mb;
2181                     systemInfo.minCutoffForMultiBody =
2182                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2183                 }
2184                 /* We determine cutoff_mbody later */
2185                 systemInfo.increaseMultiBodyCutoff = true;
2186             }
2187             else
2188             {
2189                 /* No special bonded communication,
2190                  * simply increase the DD cut-off.
2191                  */
2192                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2193                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2194             }
2195         }
2196         GMX_LOG(mdlog.info)
2197                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2198                                      systemInfo.minCutoffForMultiBody);
2199
2200         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2201     }
2202
2203     systemInfo.constraintCommunicationRange = 0;
2204     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2205     {
2206         /* There is a cell size limit due to the constraints (P-LINCS) */
2207         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2208         GMX_LOG(mdlog.info)
2209                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2210                                      systemInfo.constraintCommunicationRange);
2211         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2212         {
2213             GMX_LOG(mdlog.info)
2214                     .appendText(
2215                             "This distance will limit the DD cell size, you can override this with "
2216                             "-rcon");
2217         }
2218     }
2219     else if (options.constraintCommunicationRange > 0)
2220     {
2221         /* Here we do not check for dd->splitConstraints.
2222          * because one can also set a cell size limit for virtual sites only
2223          * and at this point we don't know yet if there are intercg v-sites.
2224          */
2225         GMX_LOG(mdlog.info)
2226                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2227                                      options.constraintCommunicationRange);
2228         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2229     }
2230     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2231
2232     return systemInfo;
2233 }
2234
2235 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2236  * implemented. */
2237 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2238                              const t_commrec*     cr,
2239                              const DomdecOptions& options,
2240                              const DDSettings&    ddSettings,
2241                              const DDSystemInfo&  systemInfo,
2242                              const real           cellsizeLimit,
2243                              const gmx_ddbox_t&   ddbox)
2244 {
2245     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2246     {
2247         char     buf[STRLEN];
2248         gmx_bool bC = (systemInfo.haveSplitConstraints
2249                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2250         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2251                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2252                 bC ? " or your LINCS settings" : "");
2253
2254         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2255                              "There is no domain decomposition for %d ranks that is compatible "
2256                              "with the given box and a minimum cell size of %g nm\n"
2257                              "%s\n"
2258                              "Look in the log file for details on the domain decomposition",
2259                              cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2260     }
2261
2262     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2263     if (acs < cellsizeLimit)
2264     {
2265         if (options.numCells[XX] <= 0)
2266         {
2267             GMX_RELEASE_ASSERT(
2268                     false,
2269                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2270         }
2271         else
2272         {
2273             gmx_fatal_collective(
2274                     FARGS, cr->mpi_comm_mysim, MASTER(cr),
2275                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2276                     "options -dd, -rdd or -rcon, see the log file for details",
2277                     acs, cellsizeLimit);
2278         }
2279     }
2280
2281     const int numPPRanks =
2282             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2283     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2284     {
2285         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2286                              "The size of the domain decomposition grid (%d) does not match the "
2287                              "number of PP ranks (%d). The total number of ranks is %d",
2288                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2289     }
2290     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2291     {
2292         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2293                              "The number of separate PME ranks (%d) is larger than the number of "
2294                              "PP ranks (%d), this is not supported.",
2295                              ddGridSetup.numPmeOnlyRanks, numPPRanks);
2296     }
2297 }
2298
2299 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2300 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2301                                   t_commrec*           cr,
2302                                   const DDGridSetup&   ddGridSetup,
2303                                   const t_inputrec&    ir)
2304 {
2305     GMX_LOG(mdlog.info)
2306             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2307                                  ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2308                                  ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2309
2310     DDRankSetup ddRankSetup;
2311
2312     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2313     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2314
2315     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2316     if (ddRankSetup.usePmeOnlyRanks)
2317     {
2318         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2319     }
2320     else
2321     {
2322         ddRankSetup.numRanksDoingPme =
2323                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2324     }
2325
2326     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2327     {
2328         /* The following choices should match those
2329          * in comm_cost_est in domdec_setup.c.
2330          * Note that here the checks have to take into account
2331          * that the decomposition might occur in a different order than xyz
2332          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2333          * in which case they will not match those in comm_cost_est,
2334          * but since that is mainly for testing purposes that's fine.
2335          */
2336         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2337             && ddGridSetup.ddDimensions[1] == YY
2338             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2339             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2340             && getenv("GMX_PMEONEDD") == nullptr)
2341         {
2342             ddRankSetup.npmedecompdim = 2;
2343             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2344             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2345         }
2346         else
2347         {
2348             /* In case nc is 1 in both x and y we could still choose to
2349              * decompose pme in y instead of x, but we use x for simplicity.
2350              */
2351             ddRankSetup.npmedecompdim = 1;
2352             if (ddGridSetup.ddDimensions[0] == YY)
2353             {
2354                 ddRankSetup.npmenodes_x = 1;
2355                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2356             }
2357             else
2358             {
2359                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2360                 ddRankSetup.npmenodes_y = 1;
2361             }
2362         }
2363         GMX_LOG(mdlog.info)
2364                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2365                                      ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2366     }
2367     else
2368     {
2369         ddRankSetup.npmedecompdim = 0;
2370         ddRankSetup.npmenodes_x   = 0;
2371         ddRankSetup.npmenodes_y   = 0;
2372     }
2373
2374     return ddRankSetup;
2375 }
2376
2377 /*! \brief Set the cell size and interaction limits */
2378 static void set_dd_limits(const gmx::MDLogger& mdlog,
2379                           t_commrec*           cr,
2380                           gmx_domdec_t*        dd,
2381                           const DomdecOptions& options,
2382                           const DDSettings&    ddSettings,
2383                           const DDSystemInfo&  systemInfo,
2384                           const DDGridSetup&   ddGridSetup,
2385                           const int            numPPRanks,
2386                           const gmx_mtop_t*    mtop,
2387                           const t_inputrec*    ir,
2388                           const gmx_ddbox_t&   ddbox)
2389 {
2390     gmx_domdec_comm_t* comm = dd->comm;
2391     comm->ddSettings        = ddSettings;
2392
2393     /* Initialize to GPU share count to 0, might change later */
2394     comm->nrank_gpu_shared = 0;
2395
2396     comm->dlbState = comm->ddSettings.initialDlbState;
2397     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2398     /* To consider turning DLB on after 2*nstlist steps we need to check
2399      * at partitioning count 3. Thus we need to increase the first count by 2.
2400      */
2401     comm->ddPartioningCountFirstDlbOff += 2;
2402
2403     comm->bPMELoadBalDLBLimits = FALSE;
2404
2405     /* Allocate the charge group/atom sorting struct */
2406     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2407
2408     comm->systemInfo = systemInfo;
2409
2410     if (systemInfo.useUpdateGroups)
2411     {
2412         /* Note: We would like to use dd->nnodes for the atom count estimate,
2413          *       but that is not yet available here. But this anyhow only
2414          *       affect performance up to the second dd_partition_system call.
2415          */
2416         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2417         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2418                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2419                 homeAtomCountEstimate);
2420     }
2421
2422     /* Set the DD setup given by ddGridSetup */
2423     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2424     dd->ndim = ddGridSetup.numDDDimensions;
2425     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2426
2427     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2428
2429     snew(comm->slb_frac, DIM);
2430     if (isDlbDisabled(comm))
2431     {
2432         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2433         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2434         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2435     }
2436
2437     /* Set the multi-body cut-off and cellsize limit for DLB */
2438     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2439     comm->cellsize_limit = systemInfo.cellsizeLimit;
2440     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2441     {
2442         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2443         {
2444             /* Set the bonded communication distance to halfway
2445              * the minimum and the maximum,
2446              * since the extra communication cost is nearly zero.
2447              */
2448             real acs           = average_cellsize_min(ddbox, dd->numCells);
2449             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2450             if (!isDlbDisabled(comm))
2451             {
2452                 /* Check if this does not limit the scaling */
2453                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2454             }
2455             if (!systemInfo.filterBondedCommunication)
2456             {
2457                 /* Without bBondComm do not go beyond the n.b. cut-off */
2458                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2459                 if (comm->cellsize_limit >= systemInfo.cutoff)
2460                 {
2461                     /* We don't loose a lot of efficieny
2462                      * when increasing it to the n.b. cut-off.
2463                      * It can even be slightly faster, because we need
2464                      * less checks for the communication setup.
2465                      */
2466                     comm->cutoff_mbody = systemInfo.cutoff;
2467                 }
2468             }
2469             /* Check if we did not end up below our original limit */
2470             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2471
2472             if (comm->cutoff_mbody > comm->cellsize_limit)
2473             {
2474                 comm->cellsize_limit = comm->cutoff_mbody;
2475             }
2476         }
2477         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2478     }
2479
2480     if (debug)
2481     {
2482         fprintf(debug,
2483                 "Bonded atom communication beyond the cut-off: %s\n"
2484                 "cellsize limit %f\n",
2485                 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2486     }
2487
2488     if (MASTER(cr))
2489     {
2490         check_dd_restrictions(dd, ir, mdlog);
2491     }
2492 }
2493
2494 void dd_init_bondeds(FILE*                      fplog,
2495                      gmx_domdec_t*              dd,
2496                      const gmx_mtop_t&          mtop,
2497                      const gmx_vsite_t*         vsite,
2498                      const t_inputrec*          ir,
2499                      gmx_bool                   bBCheck,
2500                      gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
2501 {
2502     gmx_domdec_comm_t* comm;
2503
2504     dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2505
2506     comm = dd->comm;
2507
2508     if (comm->systemInfo.filterBondedCommunication)
2509     {
2510         /* Communicate atoms beyond the cut-off for bonded interactions */
2511         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2512     }
2513     else
2514     {
2515         /* Only communicate atoms based on cut-off */
2516         comm->bondedLinks = nullptr;
2517     }
2518 }
2519
2520 static void writeSettings(gmx::TextWriter*   log,
2521                           gmx_domdec_t*      dd,
2522                           const gmx_mtop_t*  mtop,
2523                           const t_inputrec*  ir,
2524                           gmx_bool           bDynLoadBal,
2525                           real               dlb_scale,
2526                           const gmx_ddbox_t* ddbox)
2527 {
2528     gmx_domdec_comm_t* comm;
2529     int                d;
2530     ivec               np;
2531     real               limit, shrink;
2532
2533     comm = dd->comm;
2534
2535     if (bDynLoadBal)
2536     {
2537         log->writeString("The maximum number of communication pulses is:");
2538         for (d = 0; d < dd->ndim; d++)
2539         {
2540             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2541         }
2542         log->ensureLineBreak();
2543         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2544                                 comm->cellsize_limit);
2545         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2546         log->writeString("The allowed shrink of domain decomposition cells is:");
2547         for (d = 0; d < DIM; d++)
2548         {
2549             if (dd->numCells[d] > 1)
2550             {
2551                 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2552                 {
2553                     shrink = 0;
2554                 }
2555                 else
2556                 {
2557                     shrink = comm->cellsize_min_dlb[d]
2558                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2559                 }
2560                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2561             }
2562         }
2563         log->ensureLineBreak();
2564     }
2565     else
2566     {
2567         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2568         log->writeString("The initial number of communication pulses is:");
2569         for (d = 0; d < dd->ndim; d++)
2570         {
2571             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2572         }
2573         log->ensureLineBreak();
2574         log->writeString("The initial domain decomposition cell size is:");
2575         for (d = 0; d < DIM; d++)
2576         {
2577             if (dd->numCells[d] > 1)
2578             {
2579                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2580             }
2581         }
2582         log->ensureLineBreak();
2583         log->writeLine();
2584     }
2585
2586     const bool haveInterDomainVsites =
2587             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2588
2589     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2590         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2591     {
2592         std::string decompUnits;
2593         if (comm->systemInfo.useUpdateGroups)
2594         {
2595             decompUnits = "atom groups";
2596         }
2597         else
2598         {
2599             decompUnits = "atoms";
2600         }
2601
2602         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2603                                 decompUnits.c_str());
2604         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "",
2605                                 comm->systemInfo.cutoff);
2606
2607         if (bDynLoadBal)
2608         {
2609             limit = dd->comm->cellsize_limit;
2610         }
2611         else
2612         {
2613             if (dd->unitCellInfo.ddBoxIsDynamic)
2614             {
2615                 log->writeLine(
2616                         "(the following are initial values, they could change due to box "
2617                         "deformation)");
2618             }
2619             limit = dd->comm->cellsize_min[XX];
2620             for (d = 1; d < DIM; d++)
2621             {
2622                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2623             }
2624         }
2625
2626         if (comm->systemInfo.haveInterDomainBondeds)
2627         {
2628             log->writeLineFormatted("%40s  %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2629                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2630             log->writeLineFormatted("%40s  %-7s %6.3f nm", "multi-body bonded interactions",
2631                                     "(-rdd)",
2632                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2633                                             ? comm->cutoff_mbody
2634                                             : std::min(comm->systemInfo.cutoff, limit));
2635         }
2636         if (haveInterDomainVsites)
2637         {
2638             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2639         }
2640         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2641         {
2642             std::string separation =
2643                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2644             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2645         }
2646         log->ensureLineBreak();
2647     }
2648 }
2649
2650 static void logSettings(const gmx::MDLogger& mdlog,
2651                         gmx_domdec_t*        dd,
2652                         const gmx_mtop_t*    mtop,
2653                         const t_inputrec*    ir,
2654                         real                 dlb_scale,
2655                         const gmx_ddbox_t*   ddbox)
2656 {
2657     gmx::StringOutputStream stream;
2658     gmx::TextWriter         log(&stream);
2659     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2660     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2661     {
2662         {
2663             log.ensureEmptyLine();
2664             log.writeLine(
2665                     "When dynamic load balancing gets turned on, these settings will change to:");
2666         }
2667         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2668     }
2669     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2670 }
2671
2672 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2673                                 gmx_domdec_t*        dd,
2674                                 real                 dlb_scale,
2675                                 const t_inputrec*    ir,
2676                                 const gmx_ddbox_t*   ddbox)
2677 {
2678     gmx_domdec_comm_t* comm;
2679     int                d, dim, npulse, npulse_d_max, npulse_d;
2680     gmx_bool           bNoCutOff;
2681
2682     comm = dd->comm;
2683
2684     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2685
2686     /* Determine the maximum number of comm. pulses in one dimension */
2687
2688     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2689
2690     /* Determine the maximum required number of grid pulses */
2691     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2692     {
2693         /* Only a single pulse is required */
2694         npulse = 1;
2695     }
2696     else if (!bNoCutOff && comm->cellsize_limit > 0)
2697     {
2698         /* We round down slightly here to avoid overhead due to the latency
2699          * of extra communication calls when the cut-off
2700          * would be only slightly longer than the cell size.
2701          * Later cellsize_limit is redetermined,
2702          * so we can not miss interactions due to this rounding.
2703          */
2704         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2705     }
2706     else
2707     {
2708         /* There is no cell size limit */
2709         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2710     }
2711
2712     if (!bNoCutOff && npulse > 1)
2713     {
2714         /* See if we can do with less pulses, based on dlb_scale */
2715         npulse_d_max = 0;
2716         for (d = 0; d < dd->ndim; d++)
2717         {
2718             dim      = dd->dim[d];
2719             npulse_d = static_cast<int>(
2720                     1
2721                     + dd->numCells[dim] * comm->systemInfo.cutoff
2722                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2723             npulse_d_max = std::max(npulse_d_max, npulse_d);
2724         }
2725         npulse = std::min(npulse, npulse_d_max);
2726     }
2727
2728     /* This env var can override npulse */
2729     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2730     if (d > 0)
2731     {
2732         npulse = d;
2733     }
2734
2735     comm->maxpulse       = 1;
2736     comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2737     for (d = 0; d < dd->ndim; d++)
2738     {
2739         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2740         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2741         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2742         {
2743             comm->bVacDLBNoLimit = FALSE;
2744         }
2745     }
2746
2747     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2748     if (!comm->bVacDLBNoLimit)
2749     {
2750         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2751     }
2752     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2753     /* Set the minimum cell size for each DD dimension */
2754     for (d = 0; d < dd->ndim; d++)
2755     {
2756         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2757         {
2758             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2759         }
2760         else
2761         {
2762             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2763         }
2764     }
2765     if (comm->cutoff_mbody <= 0)
2766     {
2767         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2768     }
2769     if (isDlbOn(comm))
2770     {
2771         set_dlb_limits(dd);
2772     }
2773 }
2774
2775 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2776 {
2777     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2778 }
2779
2780 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2781 {
2782     /* If each molecule is a single charge group
2783      * or we use domain decomposition for each periodic dimension,
2784      * we do not need to take pbc into account for the bonded interactions.
2785      */
2786     return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2787             && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2788                  && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2789 }
2790
2791 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2792 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2793                                   gmx_domdec_t*        dd,
2794                                   real                 dlb_scale,
2795                                   const gmx_mtop_t*    mtop,
2796                                   const t_inputrec*    ir,
2797                                   const gmx_ddbox_t*   ddbox)
2798 {
2799     gmx_domdec_comm_t* comm        = dd->comm;
2800     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2801
2802     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2803     {
2804         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2805         if (ddRankSetup.npmedecompdim >= 2)
2806         {
2807             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2808         }
2809     }
2810     else
2811     {
2812         ddRankSetup.numRanksDoingPme = 0;
2813         if (dd->pme_nodeid >= 0)
2814         {
2815             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2816                                  "Can not have separate PME ranks without PME electrostatics");
2817         }
2818     }
2819
2820     if (debug)
2821     {
2822         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2823     }
2824     if (!isDlbDisabled(comm))
2825     {
2826         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2827     }
2828
2829     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2830
2831     real vol_frac;
2832     if (ir->pbcType == PbcType::No)
2833     {
2834         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2835     }
2836     else
2837     {
2838         vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2839                    / static_cast<double>(dd->nnodes);
2840     }
2841     if (debug)
2842     {
2843         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2844     }
2845     int natoms_tot = mtop->natoms;
2846
2847     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2848 }
2849
2850 /*! \brief Get some important DD parameters which can be modified by env.vars */
2851 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2852                                 const DomdecOptions&     options,
2853                                 const gmx::MdrunOptions& mdrunOptions,
2854                                 const t_inputrec&        ir)
2855 {
2856     DDSettings ddSettings;
2857
2858     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2859     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2860     ddSettings.request1D           = bool(dd_getenv(mdlog, "GMX_DD_1D", 0));
2861     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2862     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2863     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2864     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2865     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2866     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2867     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2868
2869     if (ddSettings.useSendRecv2)
2870     {
2871         GMX_LOG(mdlog.info)
2872                 .appendText(
2873                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2874                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2875                         "communication");
2876     }
2877
2878     if (ddSettings.eFlop)
2879     {
2880         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2881         ddSettings.recordLoad = true;
2882     }
2883     else
2884     {
2885         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2886     }
2887
2888     ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2889                                                           ddSettings.recordLoad, mdrunOptions, &ir);
2890     GMX_LOG(mdlog.info)
2891             .appendTextFormatted("Dynamic load balancing: %s",
2892                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2893
2894     return ddSettings;
2895 }
2896
2897 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2898
2899 /*! \brief Return whether the simulation described can run a 1D DD.
2900  *
2901  * The GPU halo exchange code requires 1D DD. Such a DD
2902  * generally requires a larger box than other possible decompositions
2903  * with the same rank count, so the calling code might need to decide
2904  * what is the most appropriate way to run the simulation based on
2905  * whether such a DD is possible.
2906  *
2907  * This function works like init_domain_decomposition(), but will not
2908  * give a fatal error, and only uses \c cr for communicating between
2909  * ranks.
2910  *
2911  * It is safe to call before thread-MPI spawns ranks, so that
2912  * thread-MPI can decide whether and how to trigger the GPU halo
2913  * exchange code path. The number of PME ranks, if any, should be set
2914  * in \c options.numPmeRanks.
2915  */
2916 static bool canMake1DDomainDecomposition(const DDSettings&              ddSettingsOriginal,
2917                                          const t_commrec*               cr,
2918                                          const int                      numRanksRequested,
2919                                          const DomdecOptions&           options,
2920                                          const gmx_mtop_t&              mtop,
2921                                          const t_inputrec&              ir,
2922                                          const matrix                   box,
2923                                          gmx::ArrayRef<const gmx::RVec> xGlobal)
2924 {
2925     // Ensure we don't write any output from this checking routine
2926     gmx::MDLogger dummyLogger;
2927
2928     DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2929
2930     DDSettings ddSettings = ddSettingsOriginal;
2931     ddSettings.request1D  = true;
2932     const real gridSetupCellsizeLimit =
2933             getDDGridSetupCellSizeLimit(dummyLogger, !isDlbDisabled(ddSettings.initialDlbState),
2934                                         options.dlbScaling, ir, systemInfo.cellsizeLimit);
2935     gmx_ddbox_t ddbox = { 0 };
2936     DDGridSetup ddGridSetup =
2937             getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2938                            gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2939
2940     const bool canMake1DDD = (ddGridSetup.numDomains[XX] != 0);
2941
2942     return canMake1DDD;
2943 }
2944
2945 bool is1D(const gmx_domdec_t& dd)
2946 {
2947     const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
2948     const int  productOfDimensionSizes      = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
2949     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2950
2951     return decompositionHasOneDimension;
2952 }
2953
2954 namespace gmx
2955 {
2956
2957 // TODO once the functionality stablizes, move this class and
2958 // supporting functionality into builder.cpp
2959 /*! \brief Impl class for DD builder */
2960 class DomainDecompositionBuilder::Impl
2961 {
2962 public:
2963     //! Constructor
2964     Impl(const MDLogger&      mdlog,
2965          t_commrec*           cr,
2966          const DomdecOptions& options,
2967          const MdrunOptions&  mdrunOptions,
2968          bool                 prefer1D,
2969          const gmx_mtop_t&    mtop,
2970          const t_inputrec&    ir,
2971          const matrix         box,
2972          ArrayRef<const RVec> xGlobal);
2973
2974     //! Build the resulting DD manager
2975     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2976
2977     //! Objects used in constructing and configuring DD
2978     //! {
2979     //! Logging object
2980     const MDLogger& mdlog_;
2981     //! Communication object
2982     t_commrec* cr_;
2983     //! User-supplied options configuring DD behavior
2984     const DomdecOptions options_;
2985     //! Global system topology
2986     const gmx_mtop_t& mtop_;
2987     //! User input values from the tpr file
2988     const t_inputrec& ir_;
2989     //! }
2990
2991     //! Internal objects used in constructing DD
2992     //! {
2993     //! Settings combined from the user input
2994     DDSettings ddSettings_;
2995     //! Information derived from the simulation system
2996     DDSystemInfo systemInfo_;
2997     //! Box structure
2998     gmx_ddbox_t ddbox_ = { 0 };
2999     //! Organization of the DD grids
3000     DDGridSetup ddGridSetup_;
3001     //! Organzation of the DD ranks
3002     DDRankSetup ddRankSetup_;
3003     //! Number of DD cells in each dimension
3004     ivec ddCellIndex_ = { 0, 0, 0 };
3005     //! IDs of PME-only ranks
3006     std::vector<int> pmeRanks_;
3007     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3008     CartesianRankSetup cartSetup_;
3009     //! }
3010 };
3011
3012 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3013                                        t_commrec*           cr,
3014                                        const DomdecOptions& options,
3015                                        const MdrunOptions&  mdrunOptions,
3016                                        const bool           prefer1D,
3017                                        const gmx_mtop_t&    mtop,
3018                                        const t_inputrec&    ir,
3019                                        const matrix         box,
3020                                        ArrayRef<const RVec> xGlobal) :
3021     mdlog_(mdlog),
3022     cr_(cr),
3023     options_(options),
3024     mtop_(mtop),
3025     ir_(ir)
3026 {
3027     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3028
3029     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3030
3031     if (prefer1D
3032         && canMake1DDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_, ir_, box, xGlobal))
3033     {
3034         ddSettings_.request1D = true;
3035     }
3036
3037     if (ddSettings_.eFlop > 1)
3038     {
3039         /* Ensure that we have different random flop counts on different ranks */
3040         srand(1 + cr_->nodeid);
3041     }
3042
3043     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3044
3045     const int  numRanksRequested         = cr_->nnodes;
3046     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
3047     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
3048                                    options_.numPmeRanks, checkForLargePrimeFactors);
3049
3050     // DD grid setup uses a more different cell size limit for
3051     // automated setup than the one in systemInfo_. The latter is used
3052     // in set_dd_limits() to configure DLB, for example.
3053     const real gridSetupCellsizeLimit =
3054             getDDGridSetupCellSizeLimit(mdlog_, !isDlbDisabled(ddSettings_.initialDlbState),
3055                                         options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3056     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3057                                   gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3058     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3059
3060     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3061
3062     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3063
3064     /* Generate the group communicator, also decides the duty of each rank */
3065     cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3066                                         ddCellIndex_, &pmeRanks_);
3067 }
3068
3069 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3070 {
3071     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3072
3073     copy_ivec(ddCellIndex_, dd->ci);
3074
3075     dd->comm = init_dd_comm();
3076
3077     dd->comm->ddRankSetup        = ddRankSetup_;
3078     dd->comm->cartesianRankSetup = cartSetup_;
3079
3080     set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3081                   ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3082
3083     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3084
3085     if (thisRankHasDuty(cr_, DUTY_PP))
3086     {
3087         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3088
3089         setup_neighbor_relations(dd);
3090     }
3091
3092     /* Set overallocation to avoid frequent reallocation of arrays */
3093     set_over_alloc_dd(TRUE);
3094
3095     dd->atomSets = atomSets;
3096
3097     return dd;
3098 }
3099
3100 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3101                                                        t_commrec*           cr,
3102                                                        const DomdecOptions& options,
3103                                                        const MdrunOptions&  mdrunOptions,
3104                                                        const bool           prefer1D,
3105                                                        const gmx_mtop_t&    mtop,
3106                                                        const t_inputrec&    ir,
3107                                                        const matrix         box,
3108                                                        ArrayRef<const RVec> xGlobal) :
3109     impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1D, mtop, ir, box, xGlobal))
3110 {
3111 }
3112
3113 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3114 {
3115     return impl_->build(atomSets);
3116 }
3117
3118 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3119
3120 } // namespace gmx
3121
3122 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3123 {
3124     gmx_domdec_t* dd;
3125     gmx_ddbox_t   ddbox;
3126     int           d, dim, np;
3127     real          inv_cell_size;
3128     int           LocallyLimited;
3129
3130     dd = cr->dd;
3131
3132     set_ddbox(*dd, false, box, true, x, &ddbox);
3133
3134     LocallyLimited = 0;
3135
3136     for (d = 0; d < dd->ndim; d++)
3137     {
3138         dim = dd->dim[d];
3139
3140         inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3141         if (dd->unitCellInfo.ddBoxIsDynamic)
3142         {
3143             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3144         }
3145
3146         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3147
3148         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3149         {
3150             if (np > dd->comm->cd[d].np_dlb)
3151             {
3152                 return FALSE;
3153             }
3154
3155             /* If a current local cell size is smaller than the requested
3156              * cut-off, we could still fix it, but this gets very complicated.
3157              * Without fixing here, we might actually need more checks.
3158              */
3159             real cellSizeAlongDim =
3160                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3161             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3162             {
3163                 LocallyLimited = 1;
3164             }
3165         }
3166     }
3167
3168     if (!isDlbDisabled(dd->comm))
3169     {
3170         /* If DLB is not active yet, we don't need to check the grid jumps.
3171          * Actually we shouldn't, because then the grid jump data is not set.
3172          */
3173         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3174         {
3175             LocallyLimited = 1;
3176         }
3177
3178         gmx_sumi(1, &LocallyLimited, cr);
3179
3180         if (LocallyLimited > 0)
3181         {
3182             return FALSE;
3183         }
3184     }
3185
3186     return TRUE;
3187 }
3188
3189 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3190 {
3191     gmx_bool bCutoffAllowed;
3192
3193     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3194
3195     if (bCutoffAllowed)
3196     {
3197         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3198     }
3199
3200     return bCutoffAllowed;
3201 }
3202
3203 void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
3204                               const t_commrec&     cr,
3205                               const DeviceContext& deviceContext,
3206                               const DeviceStream&  streamLocal,
3207                               const DeviceStream&  streamNonLocal)
3208 {
3209
3210     int gpuHaloExchangeSize = 0;
3211     int pulseStart          = 0;
3212     if (cr.dd->gpuHaloExchange.empty())
3213     {
3214         GMX_LOG(mdlog.warning)
3215                 .asParagraph()
3216                 .appendTextFormatted(
3217                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3218                         "by the "
3219                         "GMX_GPU_DD_COMMS environment variable.");
3220     }
3221     else
3222     {
3223         gpuHaloExchangeSize = static_cast<int>(cr.dd->gpuHaloExchange.size());
3224         pulseStart          = gpuHaloExchangeSize - 1;
3225     }
3226     if (cr.dd->comm->cd[0].numPulses() > gpuHaloExchangeSize)
3227     {
3228         for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3229         {
3230             cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
3231                     cr.dd, cr.mpi_comm_mysim, deviceContext, streamLocal, streamNonLocal, pulse));
3232         }
3233     }
3234 }
3235
3236 void reinitGpuHaloExchange(const t_commrec&              cr,
3237                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3238                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3239 {
3240     for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3241     {
3242         cr.dd->gpuHaloExchange[pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3243     }
3244 }
3245
3246 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3247                                    const matrix          box,
3248                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3249 {
3250     for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
3251     {
3252         cr.dd->gpuHaloExchange[pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3253     }
3254 }
3255
3256 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3257 {
3258     for (int pulse = cr.dd->comm->cd[0].numPulses() - 1; pulse >= 0; pulse--)
3259     {
3260         cr.dd->gpuHaloExchange[pulse]->communicateHaloForces(accumulateForces);
3261     }
3262 }