src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the.
   5  * Copyright (c) 2019, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #include "gmxpre.h"
  38
  39 #include "domdec.h"
  40
  41 #include "config.h"
  42
  43 #include <cassert>
  44 #include <cinttypes>
  45 #include <climits>
  46 #include <cmath>
  47 #include <cstdio>
  48 #include <cstdlib>
  49 #include <cstring>
  50
  51 #include <algorithm>
  52 #include <memory>
  53
  54 #include "gromacs/domdec/builder.h"
  55 #include "gromacs/domdec/collect.h"
  56 #include "gromacs/domdec/dlb.h"
  57 #include "gromacs/domdec/dlbtiming.h"
  58 #include "gromacs/domdec/domdec_network.h"
  59 #include "gromacs/domdec/ga2la.h"
  60 #include "gromacs/domdec/gpuhaloexchange.h"
  61 #include "gromacs/domdec/options.h"
  62 #include "gromacs/domdec/partition.h"
  63 #include "gromacs/gmxlib/network.h"
  64 #include "gromacs/gmxlib/nrnb.h"
  65 #include "gromacs/gpu_utils/gpu_utils.h"
  66 #include "gromacs/hardware/hw_info.h"
  67 #include "gromacs/listed_forces/manage_threading.h"
  68 #include "gromacs/math/vec.h"
  69 #include "gromacs/math/vectypes.h"
  70 #include "gromacs/mdlib/calc_verletbuf.h"
  71 #include "gromacs/mdlib/constr.h"
  72 #include "gromacs/mdlib/constraintrange.h"
  73 #include "gromacs/mdlib/updategroups.h"
  74 #include "gromacs/mdlib/vsite.h"
  75 #include "gromacs/mdtypes/commrec.h"
  76 #include "gromacs/mdtypes/forceoutput.h"
  77 #include "gromacs/mdtypes/inputrec.h"
  78 #include "gromacs/mdtypes/mdrunoptions.h"
  79 #include "gromacs/mdtypes/state.h"
  80 #include "gromacs/pbcutil/ishift.h"
  81 #include "gromacs/pbcutil/pbc.h"
  82 #include "gromacs/pulling/pull.h"
  83 #include "gromacs/timing/wallcycle.h"
  84 #include "gromacs/topology/block.h"
  85 #include "gromacs/topology/idef.h"
  86 #include "gromacs/topology/ifunc.h"
  87 #include "gromacs/topology/mtop_lookup.h"
  88 #include "gromacs/topology/mtop_util.h"
  89 #include "gromacs/topology/topology.h"
  90 #include "gromacs/utility/basedefinitions.h"
  91 #include "gromacs/utility/basenetwork.h"
  92 #include "gromacs/utility/cstringutil.h"
  93 #include "gromacs/utility/exceptions.h"
  94 #include "gromacs/utility/fatalerror.h"
  95 #include "gromacs/utility/gmxmpi.h"
  96 #include "gromacs/utility/logger.h"
  97 #include "gromacs/utility/real.h"
  98 #include "gromacs/utility/smalloc.h"
  99 #include "gromacs/utility/strconvert.h"
 100 #include "gromacs/utility/stringstream.h"
 101 #include "gromacs/utility/stringutil.h"
 102 #include "gromacs/utility/textwriter.h"
 103
 104 #include "atomdistribution.h"
 105 #include "box.h"
 106 #include "cellsizes.h"
 107 #include "distribute.h"
 108 #include "domdec_constraints.h"
 109 #include "domdec_internal.h"
 110 #include "domdec_setup.h"
 111 #include "domdec_vsite.h"
 112 #include "redistribute.h"
 113 #include "utility.h"
 114
 115 // TODO remove this when moving domdec into gmx namespace
 116 using gmx::DdRankOrder;
 117 using gmx::DlbOption;
 118 using gmx::DomdecOptions;
 119
 120 static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 121
 122 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 123 #define DD_CGIBS 2
 124
 125 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 126 #define DD_FLAG_NRCG 65535
 127 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 128 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 129
 130 /* The DD zone order */
 131 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 132                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 133
 134 /* The non-bonded zone-pair setup for domain decomposition
 135  * The first number is the i-zone, the second number the first j-zone seen by
 136  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 137  * As is, this is for 3D decomposition, where there are 4 i-zones.
 138  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 139  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 140  */
 141 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 142                                                                { 1, 3, 6 },
 143                                                                { 2, 5, 6 },
 144                                                                { 3, 5, 7 } };
 145
 146
 147 /*
 148    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 149
 150    static void index2xyz(ivec nc,int ind,ivec xyz)
 151    {
 152    xyz[XX] = ind % nc[XX];
 153    xyz[YY] = (ind / nc[XX]) % nc[YY];
 154    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 155    }
 156  */
 157
 158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 159 {
 160     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 161     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 162     xyz[ZZ] = ind % nc[ZZ];
 163 }
 164
 165 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 166 {
 167     int ddnodeid = -1;
 168
 169     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 170     const int                 ddindex   = dd_index(dd->nc, c);
 171     if (cartSetup.bCartesianPP_PME)
 172     {
 173         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 174     }
 175     else if (cartSetup.bCartesianPP)
 176     {
 177 #if GMX_MPI
 178         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 179 #endif
 180     }
 181     else
 182     {
 183         ddnodeid = ddindex;
 184     }
 185
 186     return ddnodeid;
 187 }
 188
 189 int ddglatnr(const gmx_domdec_t* dd, int i)
 190 {
 191     int atnr;
 192
 193     if (dd == nullptr)
 194     {
 195         atnr = i + 1;
 196     }
 197     else
 198     {
 199         if (i >= dd->comm->atomRanges.numAtomsTotal())
 200         {
 201             gmx_fatal(FARGS,
 202                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 203                       i, dd->comm->atomRanges.numAtomsTotal());
 204         }
 205         atnr = dd->globalAtomIndices[i] + 1;
 206     }
 207
 208     return atnr;
 209 }
 210
 211 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 212 {
 213     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 214     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 215 }
 216
 217 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 218 {
 219     int i;
 220
 221     if (state->ddp_count != dd->ddp_count)
 222     {
 223         gmx_incons("The MD state does not match the domain decomposition state");
 224     }
 225
 226     state->cg_gl.resize(dd->ncg_home);
 227     for (i = 0; i < dd->ncg_home; i++)
 228     {
 229         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 230     }
 231
 232     state->ddp_count_cg_gl = dd->ddp_count;
 233 }
 234
 235 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 236 {
 237     return &dd->comm->zones;
 238 }
 239
 240 int dd_numAtomsZones(const gmx_domdec_t& dd)
 241 {
 242     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 243 }
 244
 245 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 246 {
 247     return dd.comm->atomRanges.numHomeAtoms();
 248 }
 249
 250 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 251 {
 252     /* We currently set mdatoms entries for all atoms:
 253      * local + non-local + communicated for vsite + constraints
 254      */
 255
 256     return dd->comm->atomRanges.numAtomsTotal();
 257 }
 258
 259 int dd_natoms_vsite(const gmx_domdec_t* dd)
 260 {
 261     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 262 }
 263
 264 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 265 {
 266     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 267     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 268 }
 269
 270 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 271 {
 272     wallcycle_start(wcycle, ewcMOVEX);
 273
 274     int                    nzone, nat_tot;
 275     gmx_domdec_comm_t*     comm;
 276     gmx_domdec_comm_dim_t* cd;
 277     rvec                   shift = { 0, 0, 0 };
 278     gmx_bool               bPBC, bScrew;
 279
 280     comm = dd->comm;
 281
 282     nzone   = 1;
 283     nat_tot = comm->atomRanges.numHomeAtoms();
 284     for (int d = 0; d < dd->ndim; d++)
 285     {
 286         bPBC   = (dd->ci[dd->dim[d]] == 0);
 287         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 288         if (bPBC)
 289         {
 290             copy_rvec(box[dd->dim[d]], shift);
 291         }
 292         cd = &comm->cd[d];
 293         for (const gmx_domdec_ind_t& ind : cd->ind)
 294         {
 295             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 296             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 297             int                       n          = 0;
 298             if (!bPBC)
 299             {
 300                 for (int j : ind.index)
 301                 {
 302                     sendBuffer[n] = x[j];
 303                     n++;
 304                 }
 305             }
 306             else if (!bScrew)
 307             {
 308                 for (int j : ind.index)
 309                 {
 310                     /* We need to shift the coordinates */
 311                     for (int d = 0; d < DIM; d++)
 312                     {
 313                         sendBuffer[n][d] = x[j][d] + shift[d];
 314                     }
 315                     n++;
 316                 }
 317             }
 318             else
 319             {
 320                 for (int j : ind.index)
 321                 {
 322                     /* Shift x */
 323                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 324                     /* Rotate y and z.
 325                      * This operation requires a special shift force
 326                      * treatment, which is performed in calc_vir.
 327                      */
 328                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 329                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 330                     n++;
 331                 }
 332             }
 333
 334             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 335                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 336
 337             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 338             if (cd->receiveInPlace)
 339             {
 340                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 341             }
 342             else
 343             {
 344                 receiveBuffer = receiveBufferAccess.buffer;
 345             }
 346             /* Send and receive the coordinates */
 347             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 348
 349             if (!cd->receiveInPlace)
 350             {
 351                 int j = 0;
 352                 for (int zone = 0; zone < nzone; zone++)
 353                 {
 354                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 355                     {
 356                         x[i] = receiveBuffer[j++];
 357                     }
 358                 }
 359             }
 360             nat_tot += ind.nrecv[nzone + 1];
 361         }
 362         nzone += nzone;
 363     }
 364
 365     wallcycle_stop(wcycle, ewcMOVEX);
 366 }
 367
 368 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 369 {
 370     wallcycle_start(wcycle, ewcMOVEF);
 371
 372     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 373     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 374
 375     gmx_domdec_comm_t& comm    = *dd->comm;
 376     int                nzone   = comm.zones.n / 2;
 377     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 378     for (int d = dd->ndim - 1; d >= 0; d--)
 379     {
 380         /* Only forces in domains near the PBC boundaries need to
 381            consider PBC in the treatment of fshift */
 382         const bool shiftForcesNeedPbc =
 383                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 384         const bool applyScrewPbc =
 385                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 386         /* Determine which shift vector we need */
 387         ivec vis        = { 0, 0, 0 };
 388         vis[dd->dim[d]] = 1;
 389         const int is    = IVEC2IS(vis);
 390
 391         /* Loop over the pulses */
 392         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 393         for (int p = cd.numPulses() - 1; p >= 0; p--)
 394         {
 395             const gmx_domdec_ind_t&   ind = cd.ind[p];
 396             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 397             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 398
 399             nat_tot -= ind.nrecv[nzone + 1];
 400
 401             DDBufferAccess<gmx::RVec> sendBufferAccess(
 402                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 403
 404             gmx::ArrayRef<gmx::RVec> sendBuffer;
 405             if (cd.receiveInPlace)
 406             {
 407                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 408             }
 409             else
 410             {
 411                 sendBuffer = sendBufferAccess.buffer;
 412                 int j      = 0;
 413                 for (int zone = 0; zone < nzone; zone++)
 414                 {
 415                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 416                     {
 417                         sendBuffer[j++] = f[i];
 418                     }
 419                 }
 420             }
 421             /* Communicate the forces */
 422             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 423             /* Add the received forces */
 424             int n = 0;
 425             if (!shiftForcesNeedPbc)
 426             {
 427                 for (int j : ind.index)
 428                 {
 429                     for (int d = 0; d < DIM; d++)
 430                     {
 431                         f[j][d] += receiveBuffer[n][d];
 432                     }
 433                     n++;
 434                 }
 435             }
 436             else if (!applyScrewPbc)
 437             {
 438                 for (int j : ind.index)
 439                 {
 440                     for (int d = 0; d < DIM; d++)
 441                     {
 442                         f[j][d] += receiveBuffer[n][d];
 443                     }
 444                     /* Add this force to the shift force */
 445                     for (int d = 0; d < DIM; d++)
 446                     {
 447                         fshift[is][d] += receiveBuffer[n][d];
 448                     }
 449                     n++;
 450                 }
 451             }
 452             else
 453             {
 454                 for (int j : ind.index)
 455                 {
 456                     /* Rotate the force */
 457                     f[j][XX] += receiveBuffer[n][XX];
 458                     f[j][YY] -= receiveBuffer[n][YY];
 459                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 460                     if (shiftForcesNeedPbc)
 461                     {
 462                         /* Add this force to the shift force */
 463                         for (int d = 0; d < DIM; d++)
 464                         {
 465                             fshift[is][d] += receiveBuffer[n][d];
 466                         }
 467                     }
 468                     n++;
 469                 }
 470             }
 471         }
 472         nzone /= 2;
 473     }
 474     wallcycle_stop(wcycle, ewcMOVEF);
 475 }
 476
 477 /* Convenience function for extracting a real buffer from an rvec buffer
 478  *
 479  * To reduce the number of temporary communication buffers and avoid
 480  * cache polution, we reuse gmx::RVec buffers for storing reals.
 481  * This functions return a real buffer reference with the same number
 482  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 483  */
 484 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 485 {
 486     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 487 }
 488
 489 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 490 {
 491     int                    nzone, nat_tot;
 492     gmx_domdec_comm_t*     comm;
 493     gmx_domdec_comm_dim_t* cd;
 494
 495     comm = dd->comm;
 496
 497     nzone   = 1;
 498     nat_tot = comm->atomRanges.numHomeAtoms();
 499     for (int d = 0; d < dd->ndim; d++)
 500     {
 501         cd = &comm->cd[d];
 502         for (const gmx_domdec_ind_t& ind : cd->ind)
 503         {
 504             /* Note: We provision for RVec instead of real, so a factor of 3
 505              * more than needed. The buffer actually already has this size
 506              * and we pass a plain pointer below, so this does not matter.
 507              */
 508             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 509             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 510             int                 n          = 0;
 511             for (int j : ind.index)
 512             {
 513                 sendBuffer[n++] = v[j];
 514             }
 515
 516             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 517                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 518
 519             gmx::ArrayRef<real> receiveBuffer;
 520             if (cd->receiveInPlace)
 521             {
 522                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 523             }
 524             else
 525             {
 526                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 527             }
 528             /* Send and receive the data */
 529             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 530             if (!cd->receiveInPlace)
 531             {
 532                 int j = 0;
 533                 for (int zone = 0; zone < nzone; zone++)
 534                 {
 535                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 536                     {
 537                         v[i] = receiveBuffer[j++];
 538                     }
 539                 }
 540             }
 541             nat_tot += ind.nrecv[nzone + 1];
 542         }
 543         nzone += nzone;
 544     }
 545 }
 546
 547 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 548 {
 549     int                    nzone, nat_tot;
 550     gmx_domdec_comm_t*     comm;
 551     gmx_domdec_comm_dim_t* cd;
 552
 553     comm = dd->comm;
 554
 555     nzone   = comm->zones.n / 2;
 556     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 557     for (int d = dd->ndim - 1; d >= 0; d--)
 558     {
 559         cd = &comm->cd[d];
 560         for (int p = cd->numPulses() - 1; p >= 0; p--)
 561         {
 562             const gmx_domdec_ind_t& ind = cd->ind[p];
 563
 564             /* Note: We provision for RVec instead of real, so a factor of 3
 565              * more than needed. The buffer actually already has this size
 566              * and we typecast, so this works as intended.
 567              */
 568             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 569             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 570             nat_tot -= ind.nrecv[nzone + 1];
 571
 572             DDBufferAccess<gmx::RVec> sendBufferAccess(
 573                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 574
 575             gmx::ArrayRef<real> sendBuffer;
 576             if (cd->receiveInPlace)
 577             {
 578                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 579             }
 580             else
 581             {
 582                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 583                 int j      = 0;
 584                 for (int zone = 0; zone < nzone; zone++)
 585                 {
 586                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 587                     {
 588                         sendBuffer[j++] = v[i];
 589                     }
 590                 }
 591             }
 592             /* Communicate the forces */
 593             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 594             /* Add the received forces */
 595             int n = 0;
 596             for (int j : ind.index)
 597             {
 598                 v[j] += receiveBuffer[n];
 599                 n++;
 600             }
 601         }
 602         nzone /= 2;
 603     }
 604 }
 605
 606 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 607 {
 608     const gmx_domdec_comm_t& comm       = *dd->comm;
 609     const DDSystemInfo&      systemInfo = comm.systemInfo;
 610
 611     real r = -1;
 612     if (systemInfo.haveInterDomainMultiBodyBondeds)
 613     {
 614         if (comm.cutoff_mbody > 0)
 615         {
 616             r = comm.cutoff_mbody;
 617         }
 618         else
 619         {
 620             /* cutoff_mbody=0 means we do not have DLB */
 621             r = comm.cellsize_min[dd->dim[0]];
 622             for (int di = 1; di < dd->ndim; di++)
 623             {
 624                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 625             }
 626             if (comm.systemInfo.filterBondedCommunication)
 627             {
 628                 r = std::max(r, comm.cutoff_mbody);
 629             }
 630             else
 631             {
 632                 r = std::min(r, systemInfo.cutoff);
 633             }
 634         }
 635     }
 636
 637     return r;
 638 }
 639
 640 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 641 {
 642     real r_mb;
 643
 644     r_mb = dd_cutoff_multibody(dd);
 645
 646     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 647 }
 648
 649
 650 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 651                                    const CartesianRankSetup& cartSetup,
 652                                    const ivec                coord,
 653                                    ivec                      coord_pme)
 654 {
 655     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 656     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 657     copy_ivec(coord, coord_pme);
 658     coord_pme[cartSetup.cartpmedim] =
 659             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 660 }
 661
 662 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 663 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 664 {
 665     const int npp  = ddRankSetup.numPPRanks;
 666     const int npme = ddRankSetup.numRanksDoingPme;
 667
 668     /* Here we assign a PME node to communicate with this DD node
 669      * by assuming that the major index of both is x.
 670      * We add npme/2 to obtain an even distribution.
 671      */
 672     return (ddCellIndex * npme + npme / 2) / npp;
 673 }
 674
 675 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 676 {
 677     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 678
 679     int n = 0;
 680     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 681     {
 682         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 683         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 684         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 685         {
 686             if (debug)
 687             {
 688                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 689             }
 690             pmeRanks[n] = i + 1 + n;
 691             n++;
 692         }
 693     }
 694
 695     return pmeRanks;
 696 }
 697
 698 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 699 {
 700     gmx_domdec_t* dd;
 701     ivec          coords;
 702     int           slab;
 703
 704     dd = cr->dd;
 705     /*
 706        if (dd->comm->bCartesian) {
 707        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 708        dd_coords2pmecoords(dd,coords,coords_pme);
 709        copy_ivec(dd->ntot,nc);
 710        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 711        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 712
 713        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 714        } else {
 715        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 716        }
 717      */
 718     coords[XX] = x;
 719     coords[YY] = y;
 720     coords[ZZ] = z;
 721     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 722
 723     return slab;
 724 }
 725
 726 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 727 {
 728     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 729     ivec                      coords    = { x, y, z };
 730     int                       nodeid    = -1;
 731
 732     if (cartSetup.bCartesianPP_PME)
 733     {
 734 #if GMX_MPI
 735         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 736 #endif
 737     }
 738     else
 739     {
 740         int ddindex = dd_index(cr->dd->nc, coords);
 741         if (cartSetup.bCartesianPP)
 742         {
 743             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 744         }
 745         else
 746         {
 747             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 748             {
 749                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 750             }
 751             else
 752             {
 753                 nodeid = ddindex;
 754             }
 755         }
 756     }
 757
 758     return nodeid;
 759 }
 760
 761 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 762                               const CartesianRankSetup& cartSetup,
 763                               gmx::ArrayRef<const int>  pmeRanks,
 764                               const t_commrec gmx_unused* cr,
 765                               const int                   sim_nodeid)
 766 {
 767     int pmenode = -1;
 768
 769     /* This assumes a uniform x domain decomposition grid cell size */
 770     if (cartSetup.bCartesianPP_PME)
 771     {
 772 #if GMX_MPI
 773         ivec coord, coord_pme;
 774         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 775         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 776         {
 777             /* This is a PP rank */
 778             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 779             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 780         }
 781 #endif
 782     }
 783     else if (cartSetup.bCartesianPP)
 784     {
 785         if (sim_nodeid < ddRankSetup.numPPRanks)
 786         {
 787             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 788         }
 789     }
 790     else
 791     {
 792         /* This assumes DD cells with identical x coordinates
 793          * are numbered sequentially.
 794          */
 795         if (pmeRanks.empty())
 796         {
 797             if (sim_nodeid < ddRankSetup.numPPRanks)
 798             {
 799                 /* The DD index equals the nodeid */
 800                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 801             }
 802         }
 803         else
 804         {
 805             int i = 0;
 806             while (sim_nodeid > pmeRanks[i])
 807             {
 808                 i++;
 809             }
 810             if (sim_nodeid < pmeRanks[i])
 811             {
 812                 pmenode = pmeRanks[i];
 813             }
 814         }
 815     }
 816
 817     return pmenode;
 818 }
 819
 820 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 821 {
 822     if (dd != nullptr)
 823     {
 824         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 825     }
 826     else
 827     {
 828         return { 1, 1 };
 829     }
 830 }
 831
 832 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 833 {
 834     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 835     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 836     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 837                        "This function should only be called when PME-only ranks are in use");
 838     std::vector<int> ddranks;
 839     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 840
 841     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 842     {
 843         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 844         {
 845             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 846             {
 847                 if (cartSetup.bCartesianPP_PME)
 848                 {
 849                     ivec coord = { x, y, z };
 850                     ivec coord_pme;
 851                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 852                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 853                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 854                     {
 855                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 856                     }
 857                 }
 858                 else
 859                 {
 860                     /* The slab corresponds to the nodeid in the PME group */
 861                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 862                     {
 863                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 864                     }
 865                 }
 866             }
 867         }
 868     }
 869     return ddranks;
 870 }
 871
 872 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 873 {
 874     gmx_bool bReceive = TRUE;
 875
 876     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 877     if (ddRankSetup.usePmeOnlyRanks)
 878     {
 879         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 880         if (cartSetup.bCartesianPP_PME)
 881         {
 882 #if GMX_MPI
 883             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 884             ivec coords;
 885             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 886             coords[cartSetup.cartpmedim]++;
 887             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 888             {
 889                 int rank;
 890                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 891                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 892                 {
 893                     /* This is not the last PP node for pmenode */
 894                     bReceive = FALSE;
 895                 }
 896             }
 897 #else
 898             GMX_RELEASE_ASSERT(
 899                     false,
 900                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 901 #endif
 902         }
 903         else
 904         {
 905             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 906             if (cr->sim_nodeid + 1 < cr->nnodes
 907                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 908             {
 909                 /* This is not the last PP node for pmenode */
 910                 bReceive = FALSE;
 911             }
 912         }
 913     }
 914
 915     return bReceive;
 916 }
 917
 918 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 919 {
 920     gmx_domdec_comm_t* comm;
 921     int                i;
 922
 923     comm = dd->comm;
 924
 925     snew(*dim_f, dd->nc[dim] + 1);
 926     (*dim_f)[0] = 0;
 927     for (i = 1; i < dd->nc[dim]; i++)
 928     {
 929         if (comm->slb_frac[dim])
 930         {
 931             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 932         }
 933         else
 934         {
 935             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->nc[dim]);
 936         }
 937     }
 938     (*dim_f)[dd->nc[dim]] = 1;
 939 }
 940
 941 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 942 {
 943     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 944
 945     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 946     {
 947         ddpme->dim = YY;
 948     }
 949     else
 950     {
 951         ddpme->dim = dimind;
 952     }
 953     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 954
 955     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 956
 957     if (ddpme->nslab <= 1)
 958     {
 959         return;
 960     }
 961
 962     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 963     /* Determine for each PME slab the PP location range for dimension dim */
 964     snew(ddpme->pp_min, ddpme->nslab);
 965     snew(ddpme->pp_max, ddpme->nslab);
 966     for (int slab = 0; slab < ddpme->nslab; slab++)
 967     {
 968         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 969         ddpme->pp_max[slab] = 0;
 970     }
 971     for (int i = 0; i < dd->nnodes; i++)
 972     {
 973         ivec xyz;
 974         ddindex2xyz(dd->nc, i, xyz);
 975         /* For y only use our y/z slab.
 976          * This assumes that the PME x grid size matches the DD grid size.
 977          */
 978         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 979         {
 980             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 981             int       slab;
 982             if (dimind == 0)
 983             {
 984                 slab = pmeindex / nso;
 985             }
 986             else
 987             {
 988                 slab = pmeindex % ddpme->nslab;
 989             }
 990             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 991             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 992         }
 993     }
 994
 995     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 996 }
 997
 998 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
 999 {
1000     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1001
1002     if (ddRankSetup.ddpme[0].dim == XX)
1003     {
1004         return ddRankSetup.ddpme[0].maxshift;
1005     }
1006     else
1007     {
1008         return 0;
1009     }
1010 }
1011
1012 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
1013 {
1014     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1015
1016     if (ddRankSetup.ddpme[0].dim == YY)
1017     {
1018         return ddRankSetup.ddpme[0].maxshift;
1019     }
1020     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1021     {
1022         return ddRankSetup.ddpme[1].maxshift;
1023     }
1024     else
1025     {
1026         return 0;
1027     }
1028 }
1029
1030 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1031 {
1032     return dd.comm->systemInfo.haveSplitConstraints;
1033 }
1034
1035 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1036 {
1037     return dd.comm->systemInfo.useUpdateGroups;
1038 }
1039
1040 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1041 {
1042     /* Note that the cycles value can be incorrect, either 0 or some
1043      * extremely large value, when our thread migrated to another core
1044      * with an unsynchronized cycle counter. If this happens less often
1045      * that once per nstlist steps, this will not cause issues, since
1046      * we later subtract the maximum value from the sum over nstlist steps.
1047      * A zero count will slightly lower the total, but that's a small effect.
1048      * Note that the main purpose of the subtraction of the maximum value
1049      * is to avoid throwing off the load balancing when stalls occur due
1050      * e.g. system activity or network congestion.
1051      */
1052     dd->comm->cycl[ddCycl] += cycles;
1053     dd->comm->cycl_n[ddCycl]++;
1054     if (cycles > dd->comm->cycl_max[ddCycl])
1055     {
1056         dd->comm->cycl_max[ddCycl] = cycles;
1057     }
1058 }
1059
1060 #if GMX_MPI
1061 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1062 {
1063     MPI_Comm c_row;
1064     int      dim, i, rank;
1065     ivec     loc_c;
1066     gmx_bool bPartOfGroup = FALSE;
1067
1068     dim = dd->dim[dim_ind];
1069     copy_ivec(loc, loc_c);
1070     for (i = 0; i < dd->nc[dim]; i++)
1071     {
1072         loc_c[dim] = i;
1073         rank       = dd_index(dd->nc, loc_c);
1074         if (rank == dd->rank)
1075         {
1076             /* This process is part of the group */
1077             bPartOfGroup = TRUE;
1078         }
1079     }
1080     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1081     if (bPartOfGroup)
1082     {
1083         dd->comm->mpi_comm_load[dim_ind] = c_row;
1084         if (!isDlbDisabled(dd->comm))
1085         {
1086             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1087
1088             if (dd->ci[dim] == dd->master_ci[dim])
1089             {
1090                 /* This is the root process of this row */
1091                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1092
1093                 RowMaster& rowMaster = *cellsizes.rowMaster;
1094                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1095                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1096                 rowMaster.isCellMin.resize(dd->nc[dim]);
1097                 if (dim_ind > 0)
1098                 {
1099                     rowMaster.bounds.resize(dd->nc[dim]);
1100                 }
1101                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1102             }
1103             else
1104             {
1105                 /* This is not a root process, we only need to receive cell_f */
1106                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1107             }
1108         }
1109         if (dd->ci[dim] == dd->master_ci[dim])
1110         {
1111             snew(dd->comm->load[dim_ind].load, dd->nc[dim] * DD_NLOAD_MAX);
1112         }
1113     }
1114 }
1115 #endif
1116
1117 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1118 {
1119 #if GMX_MPI
1120     int           physicalnode_id_hash;
1121     gmx_domdec_t* dd;
1122     MPI_Comm      mpi_comm_pp_physicalnode;
1123
1124     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1125     {
1126         /* Only ranks with short-ranged tasks (currently) use GPUs.
1127          * If we don't have GPUs assigned, there are no resources to share.
1128          */
1129         return;
1130     }
1131
1132     physicalnode_id_hash = gmx_physicalnode_id_hash();
1133
1134     dd = cr->dd;
1135
1136     if (debug)
1137     {
1138         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1139         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1140                 physicalnode_id_hash, gpu_id);
1141     }
1142     /* Split the PP communicator over the physical nodes */
1143     /* TODO: See if we should store this (before), as it's also used for
1144      * for the nodecomm summation.
1145      */
1146     // TODO PhysicalNodeCommunicator could be extended/used to handle
1147     // the need for per-node per-group communicators.
1148     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1149     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1150     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1151     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1152
1153     if (debug)
1154     {
1155         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1156     }
1157
1158     /* Note that some ranks could share a GPU, while others don't */
1159
1160     if (dd->comm->nrank_gpu_shared == 1)
1161     {
1162         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1163     }
1164 #else
1165     GMX_UNUSED_VALUE(cr);
1166     GMX_UNUSED_VALUE(gpu_id);
1167 #endif
1168 }
1169
1170 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1171 {
1172 #if GMX_MPI
1173     int  dim0, dim1, i, j;
1174     ivec loc;
1175
1176     if (debug)
1177     {
1178         fprintf(debug, "Making load communicators\n");
1179     }
1180
1181     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1182     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1183
1184     if (dd->ndim == 0)
1185     {
1186         return;
1187     }
1188
1189     clear_ivec(loc);
1190     make_load_communicator(dd, 0, loc);
1191     if (dd->ndim > 1)
1192     {
1193         dim0 = dd->dim[0];
1194         for (i = 0; i < dd->nc[dim0]; i++)
1195         {
1196             loc[dim0] = i;
1197             make_load_communicator(dd, 1, loc);
1198         }
1199     }
1200     if (dd->ndim > 2)
1201     {
1202         dim0 = dd->dim[0];
1203         for (i = 0; i < dd->nc[dim0]; i++)
1204         {
1205             loc[dim0] = i;
1206             dim1      = dd->dim[1];
1207             for (j = 0; j < dd->nc[dim1]; j++)
1208             {
1209                 loc[dim1] = j;
1210                 make_load_communicator(dd, 2, loc);
1211             }
1212         }
1213     }
1214
1215     if (debug)
1216     {
1217         fprintf(debug, "Finished making load communicators\n");
1218     }
1219 #endif
1220 }
1221
1222 /*! \brief Sets up the relation between neighboring domains and zones */
1223 static void setup_neighbor_relations(gmx_domdec_t* dd)
1224 {
1225     int                 d, dim, m;
1226     ivec                tmp, s;
1227     gmx_domdec_zones_t* zones;
1228     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1229
1230     for (d = 0; d < dd->ndim; d++)
1231     {
1232         dim = dd->dim[d];
1233         copy_ivec(dd->ci, tmp);
1234         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1235         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1236         copy_ivec(dd->ci, tmp);
1237         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1238         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1239         if (debug)
1240         {
1241             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1242                     dd->neighbor[d][0], dd->neighbor[d][1]);
1243         }
1244     }
1245
1246     int nzone  = (1 << dd->ndim);
1247     int nizone = (1 << std::max(dd->ndim - 1, 0));
1248     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1249
1250     zones = &dd->comm->zones;
1251
1252     for (int i = 0; i < nzone; i++)
1253     {
1254         m = 0;
1255         clear_ivec(zones->shift[i]);
1256         for (d = 0; d < dd->ndim; d++)
1257         {
1258             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1259         }
1260     }
1261
1262     zones->n = nzone;
1263     for (int i = 0; i < nzone; i++)
1264     {
1265         for (d = 0; d < DIM; d++)
1266         {
1267             s[d] = dd->ci[d] - zones->shift[i][d];
1268             if (s[d] < 0)
1269             {
1270                 s[d] += dd->nc[d];
1271             }
1272             else if (s[d] >= dd->nc[d])
1273             {
1274                 s[d] -= dd->nc[d];
1275             }
1276         }
1277     }
1278     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1279     {
1280         GMX_RELEASE_ASSERT(
1281                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1282                 "The first element for each ddNonbondedZonePairRanges should match its index");
1283
1284         DDPairInteractionRanges iZone;
1285         iZone.iZoneIndex = iZoneIndex;
1286         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1287          * j-zones up to nzone.
1288          */
1289         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1290                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1291         for (dim = 0; dim < DIM; dim++)
1292         {
1293             if (dd->nc[dim] == 1)
1294             {
1295                 /* All shifts should be allowed */
1296                 iZone.shift0[dim] = -1;
1297                 iZone.shift1[dim] = 1;
1298             }
1299             else
1300             {
1301                 /* Determine the min/max j-zone shift wrt the i-zone */
1302                 iZone.shift0[dim] = 1;
1303                 iZone.shift1[dim] = -1;
1304                 for (int jZone : iZone.jZoneRange)
1305                 {
1306                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1307                     if (shift_diff < iZone.shift0[dim])
1308                     {
1309                         iZone.shift0[dim] = shift_diff;
1310                     }
1311                     if (shift_diff > iZone.shift1[dim])
1312                     {
1313                         iZone.shift1[dim] = shift_diff;
1314                     }
1315                 }
1316             }
1317         }
1318
1319         zones->iZones.push_back(iZone);
1320     }
1321
1322     if (!isDlbDisabled(dd->comm))
1323     {
1324         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1325     }
1326
1327     if (dd->comm->ddSettings.recordLoad)
1328     {
1329         make_load_communicators(dd);
1330     }
1331 }
1332
1333 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1334                                  gmx_domdec_t*        dd,
1335                                  t_commrec gmx_unused* cr,
1336                                  bool gmx_unused reorder)
1337 {
1338 #if GMX_MPI
1339     gmx_domdec_comm_t*  comm      = dd->comm;
1340     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1341
1342     if (cartSetup.bCartesianPP)
1343     {
1344         /* Set up cartesian communication for the particle-particle part */
1345         GMX_LOG(mdlog.info)
1346                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d", dd->nc[XX],
1347                                      dd->nc[YY], dd->nc[ZZ]);
1348
1349         ivec periods;
1350         for (int i = 0; i < DIM; i++)
1351         {
1352             periods[i] = TRUE;
1353         }
1354         MPI_Comm comm_cart;
1355         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder), &comm_cart);
1356         /* We overwrite the old communicator with the new cartesian one */
1357         cr->mpi_comm_mygroup = comm_cart;
1358     }
1359
1360     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1361     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1362
1363     if (cartSetup.bCartesianPP_PME)
1364     {
1365         /* Since we want to use the original cartesian setup for sim,
1366          * and not the one after split, we need to make an index.
1367          */
1368         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1369         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1370         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1371         /* Get the rank of the DD master,
1372          * above we made sure that the master node is a PP node.
1373          */
1374         int rank;
1375         if (MASTER(cr))
1376         {
1377             rank = dd->rank;
1378         }
1379         else
1380         {
1381             rank = 0;
1382         }
1383         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1384     }
1385     else if (cartSetup.bCartesianPP)
1386     {
1387         if (!comm->ddRankSetup.usePmeOnlyRanks)
1388         {
1389             /* The PP communicator is also
1390              * the communicator for this simulation
1391              */
1392             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1393         }
1394         cr->nodeid = dd->rank;
1395
1396         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1397
1398         /* We need to make an index to go from the coordinates
1399          * to the nodeid of this simulation.
1400          */
1401         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1402         std::vector<int> buf(dd->nnodes);
1403         if (thisRankHasDuty(cr, DUTY_PP))
1404         {
1405             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1406         }
1407         /* Communicate the ddindex to simulation nodeid index */
1408         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1409                       cr->mpi_comm_mysim);
1410
1411         /* Determine the master coordinates and rank.
1412          * The DD master should be the same node as the master of this sim.
1413          */
1414         for (int i = 0; i < dd->nnodes; i++)
1415         {
1416             if (cartSetup.ddindex2simnodeid[i] == 0)
1417             {
1418                 ddindex2xyz(dd->nc, i, dd->master_ci);
1419                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1420             }
1421         }
1422         if (debug)
1423         {
1424             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1425         }
1426     }
1427     else
1428     {
1429         /* No Cartesian communicators */
1430         /* We use the rank in dd->comm->all as DD index */
1431         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1432         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1433         dd->masterrank = 0;
1434         clear_ivec(dd->master_ci);
1435     }
1436 #endif
1437
1438     GMX_LOG(mdlog.info)
1439             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1440                                  dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1441     if (debug)
1442     {
1443         fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1444                 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1445     }
1446 }
1447
1448 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1449 {
1450 #if GMX_MPI
1451     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1452
1453     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1454     {
1455         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1456         std::vector<int> buf(dd->nnodes);
1457         if (thisRankHasDuty(cr, DUTY_PP))
1458         {
1459             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1460         }
1461         /* Communicate the ddindex to simulation nodeid index */
1462         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1463                       cr->mpi_comm_mysim);
1464     }
1465 #else
1466     GMX_UNUSED_VALUE(dd);
1467     GMX_UNUSED_VALUE(cr);
1468 #endif
1469 }
1470
1471 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1472                                              t_commrec*           cr,
1473                                              const DdRankOrder    ddRankOrder,
1474                                              bool gmx_unused    reorder,
1475                                              const DDRankSetup& ddRankSetup,
1476                                              ivec               ddCellIndex,
1477                                              std::vector<int>*  pmeRanks)
1478 {
1479     CartesianRankSetup cartSetup;
1480
1481     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1482     cartSetup.bCartesianPP_PME = false;
1483
1484     const ivec& numDDCells = ddRankSetup.numPPCells;
1485     /* Initially we set ntot to the number of PP cells */
1486     copy_ivec(numDDCells, cartSetup.ntot);
1487
1488     if (cartSetup.bCartesianPP)
1489     {
1490         const int numDDCellsTot = ddRankSetup.numPPRanks;
1491         bool      bDiv[DIM];
1492         for (int i = 1; i < DIM; i++)
1493         {
1494             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1495         }
1496         if (bDiv[YY] || bDiv[ZZ])
1497         {
1498             cartSetup.bCartesianPP_PME = TRUE;
1499             /* If we have 2D PME decomposition, which is always in x+y,
1500              * we stack the PME only nodes in z.
1501              * Otherwise we choose the direction that provides the thinnest slab
1502              * of PME only nodes as this will have the least effect
1503              * on the PP communication.
1504              * But for the PME communication the opposite might be better.
1505              */
1506             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1507             {
1508                 cartSetup.cartpmedim = ZZ;
1509             }
1510             else
1511             {
1512                 cartSetup.cartpmedim = YY;
1513             }
1514             cartSetup.ntot[cartSetup.cartpmedim] +=
1515                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1516         }
1517         else
1518         {
1519             GMX_LOG(mdlog.info)
1520                     .appendTextFormatted(
1521                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1522                             "nx*nz (%d*%d)",
1523                             ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1524                             numDDCells[XX], numDDCells[ZZ]);
1525             GMX_LOG(mdlog.info)
1526                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1527         }
1528     }
1529
1530     if (cartSetup.bCartesianPP_PME)
1531     {
1532 #if GMX_MPI
1533         int  rank;
1534         ivec periods;
1535
1536         GMX_LOG(mdlog.info)
1537                 .appendTextFormatted(
1538                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1539                         cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1540
1541         for (int i = 0; i < DIM; i++)
1542         {
1543             periods[i] = TRUE;
1544         }
1545         MPI_Comm comm_cart;
1546         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1547                         &comm_cart);
1548         MPI_Comm_rank(comm_cart, &rank);
1549         if (MASTER(cr) && rank != 0)
1550         {
1551             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1552         }
1553
1554         /* With this assigment we loose the link to the original communicator
1555          * which will usually be MPI_COMM_WORLD, unless have multisim.
1556          */
1557         cr->mpi_comm_mysim = comm_cart;
1558         cr->sim_nodeid     = rank;
1559
1560         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1561
1562         GMX_LOG(mdlog.info)
1563                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1564                                      ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1565
1566         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1567         {
1568             cr->duty = DUTY_PP;
1569         }
1570         if (!ddRankSetup.usePmeOnlyRanks
1571             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1572         {
1573             cr->duty = DUTY_PME;
1574         }
1575
1576         /* Split the sim communicator into PP and PME only nodes */
1577         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1578                        dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1579 #else
1580         GMX_UNUSED_VALUE(ddCellIndex);
1581 #endif
1582     }
1583     else
1584     {
1585         switch (ddRankOrder)
1586         {
1587             case DdRankOrder::pp_pme:
1588                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1589                 break;
1590             case DdRankOrder::interleave:
1591                 /* Interleave the PP-only and PME-only ranks */
1592                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1593                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1594                 break;
1595             case DdRankOrder::cartesian: break;
1596             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1597         }
1598
1599         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1600         {
1601             cr->duty = DUTY_PME;
1602         }
1603         else
1604         {
1605             cr->duty = DUTY_PP;
1606         }
1607 #if GMX_MPI
1608         /* Split the sim communicator into PP and PME only nodes */
1609         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1610         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1611 #endif
1612     }
1613
1614     GMX_LOG(mdlog.info)
1615             .appendTextFormatted("This rank does only %s work.\n",
1616                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1617
1618     return cartSetup;
1619 }
1620
1621 /*! \brief Makes the PP communicator and the PME communicator, when needed
1622  *
1623  * Returns the Cartesian rank setup.
1624  * Sets \p cr->mpi_comm_mygroup
1625  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1626  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1627  */
1628 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1629                                                  const DDSettings&    ddSettings,
1630                                                  const DdRankOrder    ddRankOrder,
1631                                                  const DDRankSetup&   ddRankSetup,
1632                                                  t_commrec*           cr,
1633                                                  ivec                 ddCellIndex,
1634                                                  std::vector<int>*    pmeRanks)
1635 {
1636     CartesianRankSetup cartSetup;
1637
1638     if (ddRankSetup.usePmeOnlyRanks)
1639     {
1640         /* Split the communicator into a PP and PME part */
1641         cartSetup = split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1642                                        ddRankSetup, ddCellIndex, pmeRanks);
1643     }
1644     else
1645     {
1646         /* All nodes do PP and PME */
1647         /* We do not require separate communicators */
1648         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1649
1650         cartSetup.bCartesianPP     = false;
1651         cartSetup.bCartesianPP_PME = false;
1652     }
1653
1654     return cartSetup;
1655 }
1656
1657 /*! \brief For PP ranks, sets or makes the communicator
1658  *
1659  * For PME ranks get the rank id.
1660  * For PP only ranks, sets the PME-only rank.
1661  */
1662 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1663                                     const DDSettings&        ddSettings,
1664                                     gmx::ArrayRef<const int> pmeRanks,
1665                                     t_commrec*               cr,
1666                                     const int                numAtomsInSystem,
1667                                     gmx_domdec_t*            dd)
1668 {
1669     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1670     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1671
1672     if (thisRankHasDuty(cr, DUTY_PP))
1673     {
1674         /* Copy or make a new PP communicator */
1675
1676         /* We (possibly) reordered the nodes in split_communicator,
1677          * so it is no longer required in make_pp_communicator.
1678          */
1679         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1680
1681         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1682     }
1683     else
1684     {
1685         receive_ddindex2simnodeid(dd, cr);
1686     }
1687
1688     if (!thisRankHasDuty(cr, DUTY_PME))
1689     {
1690         /* Set up the commnuication to our PME node */
1691         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1692         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1693         if (debug)
1694         {
1695             fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1696                     gmx::boolToString(dd->pme_receive_vir_ener));
1697         }
1698     }
1699     else
1700     {
1701         dd->pme_nodeid = -1;
1702     }
1703
1704     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1705     if (MASTER(cr))
1706     {
1707         dd->ma = std::make_unique<AtomDistribution>(dd->nc, numAtomsInSystem, numAtomsInSystem);
1708     }
1709 }
1710
1711 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1712 {
1713     real * slb_frac, tot;
1714     int    i, n;
1715     double dbl;
1716
1717     slb_frac = nullptr;
1718     if (nc > 1 && size_string != nullptr)
1719     {
1720         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1721         snew(slb_frac, nc);
1722         tot = 0;
1723         for (i = 0; i < nc; i++)
1724         {
1725             dbl = 0;
1726             sscanf(size_string, "%20lf%n", &dbl, &n);
1727             if (dbl == 0)
1728             {
1729                 gmx_fatal(FARGS,
1730                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1731                           dir, size_string);
1732             }
1733             slb_frac[i] = dbl;
1734             size_string += n;
1735             tot += slb_frac[i];
1736         }
1737         /* Normalize */
1738         std::string relativeCellSizes = "Relative cell sizes:";
1739         for (i = 0; i < nc; i++)
1740         {
1741             slb_frac[i] /= tot;
1742             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1743         }
1744         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1745     }
1746
1747     return slb_frac;
1748 }
1749
1750 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1751 {
1752     int                  n     = 0;
1753     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1754     int                  nmol;
1755     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1756     {
1757         for (auto& ilist : extractILists(*ilists, IF_BOND))
1758         {
1759             if (NRAL(ilist.functionType) > 2)
1760             {
1761                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1762             }
1763         }
1764     }
1765
1766     return n;
1767 }
1768
1769 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1770 {
1771     char* val;
1772     int   nst;
1773
1774     nst = def;
1775     val = getenv(env_var);
1776     if (val)
1777     {
1778         if (sscanf(val, "%20d", &nst) <= 0)
1779         {
1780             nst = 1;
1781         }
1782         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1783     }
1784
1785     return nst;
1786 }
1787
1788 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1789 {
1790     if (ir->ePBC == epbcSCREW && (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1791     {
1792         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1793                   epbc_names[ir->ePBC]);
1794     }
1795
1796     if (ir->nstlist == 0)
1797     {
1798         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1799     }
1800
1801     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1802     {
1803         GMX_LOG(mdlog.warning)
1804                 .appendText(
1805                         "comm-mode angular will give incorrect results when the comm group "
1806                         "partially crosses a periodic boundary");
1807     }
1808 }
1809
1810 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1811 {
1812     real r = ddbox.box_size[XX];
1813     for (int d = 0; d < DIM; d++)
1814     {
1815         if (numDomains[d] > 1)
1816         {
1817             /* Check using the initial average cell size */
1818             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1819         }
1820     }
1821
1822     return r;
1823 }
1824
1825 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1826  */
1827 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1828                                   const std::string&   reasonStr,
1829                                   const gmx::MDLogger& mdlog)
1830 {
1831     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1832     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1833
1834     if (cmdlineDlbState == DlbState::onUser)
1835     {
1836         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1837     }
1838     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1839     {
1840         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1841     }
1842     return DlbState::offForever;
1843 }
1844
1845 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1846  *
1847  * This function parses the parameters of "-dlb" command line option setting
1848  * corresponding state values. Then it checks the consistency of the determined
1849  * state with other run parameters and settings. As a result, the initial state
1850  * may be altered or an error may be thrown if incompatibility of options is detected.
1851  *
1852  * \param [in] mdlog       Logger.
1853  * \param [in] dlbOption   Enum value for the DLB option.
1854  * \param [in] bRecordLoad True if the load balancer is recording load information.
1855  * \param [in] mdrunOptions  Options for mdrun.
1856  * \param [in] ir          Pointer mdrun to input parameters.
1857  * \returns                DLB initial/startup state.
1858  */
1859 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1860                                          DlbOption                dlbOption,
1861                                          gmx_bool                 bRecordLoad,
1862                                          const gmx::MdrunOptions& mdrunOptions,
1863                                          const t_inputrec*        ir)
1864 {
1865     DlbState dlbState = DlbState::offCanTurnOn;
1866
1867     switch (dlbOption)
1868     {
1869         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1870         case DlbOption::no: dlbState = DlbState::offUser; break;
1871         case DlbOption::yes: dlbState = DlbState::onUser; break;
1872         default: gmx_incons("Invalid dlbOption enum value");
1873     }
1874
1875     /* Reruns don't support DLB: bail or override auto mode */
1876     if (mdrunOptions.rerun)
1877     {
1878         std::string reasonStr = "it is not supported in reruns.";
1879         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1880     }
1881
1882     /* Unsupported integrators */
1883     if (!EI_DYNAMICS(ir->eI))
1884     {
1885         auto reasonStr = gmx::formatString(
1886                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1887         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1888     }
1889
1890     /* Without cycle counters we can't time work to balance on */
1891     if (!bRecordLoad)
1892     {
1893         std::string reasonStr =
1894                 "cycle counters unsupported or not enabled in the operating system kernel.";
1895         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1896     }
1897
1898     if (mdrunOptions.reproducible)
1899     {
1900         std::string reasonStr = "you started a reproducible run.";
1901         switch (dlbState)
1902         {
1903             case DlbState::offUser: break;
1904             case DlbState::offForever:
1905                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1906                 break;
1907             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1908             case DlbState::onCanTurnOff:
1909                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1910                 break;
1911             case DlbState::onUser:
1912                 return forceDlbOffOrBail(
1913                         dlbState,
1914                         reasonStr
1915                                 + " In load balanced runs binary reproducibility cannot be "
1916                                   "ensured.",
1917                         mdlog);
1918             default:
1919                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1920                           static_cast<int>(dlbState));
1921         }
1922     }
1923
1924     return dlbState;
1925 }
1926
1927 static gmx_domdec_comm_t* init_dd_comm()
1928 {
1929     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1930
1931     comm->n_load_have    = 0;
1932     comm->n_load_collect = 0;
1933
1934     comm->haveTurnedOffDlb = false;
1935
1936     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1937     {
1938         comm->sum_nat[i] = 0;
1939     }
1940     comm->ndecomp   = 0;
1941     comm->nload     = 0;
1942     comm->load_step = 0;
1943     comm->load_sum  = 0;
1944     comm->load_max  = 0;
1945     clear_ivec(comm->load_lim);
1946     comm->load_mdf = 0;
1947     comm->load_pme = 0;
1948
1949     /* This should be replaced by a unique pointer */
1950     comm->balanceRegion = ddBalanceRegionAllocate();
1951
1952     return comm;
1953 }
1954
1955 /* Returns whether mtop contains constraints and/or vsites */
1956 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1957 {
1958     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1959     int  nmol;
1960     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1961     {
1962         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1963         {
1964             return true;
1965         }
1966     }
1967
1968     return false;
1969 }
1970
1971 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1972                               const gmx_mtop_t&    mtop,
1973                               const t_inputrec&    inputrec,
1974                               const real           cutoffMargin,
1975                               DDSystemInfo*        systemInfo)
1976 {
1977     /* When we have constraints and/or vsites, it is beneficial to use
1978      * update groups (when possible) to allow independent update of groups.
1979      */
1980     if (!systemHasConstraintsOrVsites(mtop))
1981     {
1982         /* No constraints or vsites, atoms can be updated independently */
1983         return;
1984     }
1985
1986     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1987     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1988                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1989
1990     if (systemInfo->useUpdateGroups)
1991     {
1992         int numUpdateGroups = 0;
1993         for (const auto& molblock : mtop.molblock)
1994         {
1995             numUpdateGroups += molblock.nmol
1996                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1997         }
1998
1999         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
2000                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
2001
2002         /* To use update groups, the large domain-to-domain cutoff distance
2003          * should be compatible with the box size.
2004          */
2005         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2006
2007         if (systemInfo->useUpdateGroups)
2008         {
2009             GMX_LOG(mdlog.info)
2010                     .appendTextFormatted(
2011                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
2012                             "nm\n",
2013                             numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
2014                             systemInfo->maxUpdateGroupRadius);
2015         }
2016         else
2017         {
2018             GMX_LOG(mdlog.info)
2019                     .appendTextFormatted(
2020                             "The combination of rlist and box size prohibits the use of update "
2021                             "groups\n");
2022             systemInfo->updateGroupingPerMoleculetype.clear();
2023         }
2024     }
2025 }
2026
2027 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2028     npbcdim(ePBC2npbcdim(ir.ePBC)),
2029     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2030     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2031     haveScrewPBC(ir.ePBC == epbcSCREW)
2032 {
2033 }
2034
2035 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2036 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
2037                                     const bool                                  useUpdateGroups,
2038                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2039 {
2040     if (useUpdateGroups)
2041     {
2042         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2043                            "Need one grouping per moltype");
2044         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2045         {
2046             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2047             {
2048                 return false;
2049             }
2050         }
2051     }
2052     else
2053     {
2054         for (const auto& moltype : mtop.moltype)
2055         {
2056             if (moltype.atoms.nr > 1)
2057             {
2058                 return false;
2059             }
2060         }
2061     }
2062
2063     return true;
2064 }
2065
2066 /*! \brief Generate the simulation system information */
2067 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2068                                   const t_commrec*               cr,
2069                                   const DomdecOptions&           options,
2070                                   const gmx_mtop_t&              mtop,
2071                                   const t_inputrec&              ir,
2072                                   const matrix                   box,
2073                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2074 {
2075     const real tenPercentMargin = 1.1;
2076
2077     DDSystemInfo systemInfo;
2078
2079     /* We need to decide on update groups early, as this affects communication distances */
2080     systemInfo.useUpdateGroups = false;
2081     if (ir.cutoff_scheme == ecutsVERLET)
2082     {
2083         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2084         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2085     }
2086
2087     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2088             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2089     systemInfo.haveInterDomainBondeds =
2090             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2091     systemInfo.haveInterDomainMultiBodyBondeds =
2092             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2093
2094     if (systemInfo.useUpdateGroups)
2095     {
2096         systemInfo.haveSplitConstraints = false;
2097         systemInfo.haveSplitSettles     = false;
2098     }
2099     else
2100     {
2101         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2102                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2103         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2104     }
2105
2106     if (ir.rlist == 0)
2107     {
2108         /* Set the cut-off to some very large value,
2109          * so we don't need if statements everywhere in the code.
2110          * We use sqrt, since the cut-off is squared in some places.
2111          */
2112         systemInfo.cutoff = GMX_CUTOFF_INF;
2113     }
2114     else
2115     {
2116         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2117     }
2118     systemInfo.minCutoffForMultiBody = 0;
2119
2120     /* Determine the minimum cell size limit, affected by many factors */
2121     systemInfo.cellsizeLimit             = 0;
2122     systemInfo.filterBondedCommunication = false;
2123
2124     /* We do not allow home atoms to move beyond the neighboring domain
2125      * between domain decomposition steps, which limits the cell size.
2126      * Get an estimate of cell size limit due to atom displacement.
2127      * In most cases this is a large overestimate, because it assumes
2128      * non-interaction atoms.
2129      * We set the chance to 1 in a trillion steps.
2130      */
2131     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2132     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2133             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2134     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2135
2136     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2137
2138     /* TODO: PME decomposition currently requires atoms not to be more than
2139      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2140      *       In nearly all cases, limitForAtomDisplacement will be smaller
2141      *       than 2/3*rlist, so the PME requirement is satisfied.
2142      *       But it would be better for both correctness and performance
2143      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2144      *       Note that we would need to improve the pairlist buffer case.
2145      */
2146
2147     if (systemInfo.haveInterDomainBondeds)
2148     {
2149         if (options.minimumCommunicationRange > 0)
2150         {
2151             systemInfo.minCutoffForMultiBody =
2152                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2153             if (options.useBondedCommunication)
2154             {
2155                 systemInfo.filterBondedCommunication =
2156                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2157             }
2158             else
2159             {
2160                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2161             }
2162         }
2163         else if (ir.bPeriodicMols)
2164         {
2165             /* Can not easily determine the required cut-off */
2166             GMX_LOG(mdlog.warning)
2167                     .appendText(
2168                             "NOTE: Periodic molecules are present in this system. Because of this, "
2169                             "the domain decomposition algorithm cannot easily determine the "
2170                             "minimum cell size that it requires for treating bonded interactions. "
2171                             "Instead, domain decomposition will assume that half the non-bonded "
2172                             "cut-off will be a suitable lower bound.");
2173             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2174         }
2175         else
2176         {
2177             real r_2b, r_mb;
2178
2179             if (MASTER(cr))
2180             {
2181                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2182                                       options.checkBondedInteractions, &r_2b, &r_mb);
2183             }
2184             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2185             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2186
2187             /* We use an initial margin of 10% for the minimum cell size,
2188              * except when we are just below the non-bonded cut-off.
2189              */
2190             if (options.useBondedCommunication)
2191             {
2192                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2193                 {
2194                     const real r_bonded              = std::max(r_2b, r_mb);
2195                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2196                     /* This is the (only) place where we turn on the filtering */
2197                     systemInfo.filterBondedCommunication = true;
2198                 }
2199                 else
2200                 {
2201                     const real r_bonded = r_mb;
2202                     systemInfo.minCutoffForMultiBody =
2203                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2204                 }
2205                 /* We determine cutoff_mbody later */
2206                 systemInfo.increaseMultiBodyCutoff = true;
2207             }
2208             else
2209             {
2210                 /* No special bonded communication,
2211                  * simply increase the DD cut-off.
2212                  */
2213                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2214                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2215             }
2216         }
2217         GMX_LOG(mdlog.info)
2218                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2219                                      systemInfo.minCutoffForMultiBody);
2220
2221         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2222     }
2223
2224     systemInfo.constraintCommunicationRange = 0;
2225     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2226     {
2227         /* There is a cell size limit due to the constraints (P-LINCS) */
2228         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2229         GMX_LOG(mdlog.info)
2230                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2231                                      systemInfo.constraintCommunicationRange);
2232         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2233         {
2234             GMX_LOG(mdlog.info)
2235                     .appendText(
2236                             "This distance will limit the DD cell size, you can override this with "
2237                             "-rcon");
2238         }
2239     }
2240     else if (options.constraintCommunicationRange > 0)
2241     {
2242         /* Here we do not check for dd->splitConstraints.
2243          * because one can also set a cell size limit for virtual sites only
2244          * and at this point we don't know yet if there are intercg v-sites.
2245          */
2246         GMX_LOG(mdlog.info)
2247                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2248                                      options.constraintCommunicationRange);
2249         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2250     }
2251     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2252
2253     return systemInfo;
2254 }
2255
2256 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2257  * implemented. */
2258 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2259                              const t_commrec*     cr,
2260                              const DomdecOptions& options,
2261                              const DDSettings&    ddSettings,
2262                              const DDSystemInfo&  systemInfo,
2263                              const real           cellsizeLimit,
2264                              const gmx_ddbox_t&   ddbox)
2265 {
2266     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2267     {
2268         char     buf[STRLEN];
2269         gmx_bool bC = (systemInfo.haveSplitConstraints
2270                        && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2271         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2272                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2273                 bC ? " or your LINCS settings" : "");
2274
2275         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2276                              "There is no domain decomposition for %d ranks that is compatible "
2277                              "with the given box and a minimum cell size of %g nm\n"
2278                              "%s\n"
2279                              "Look in the log file for details on the domain decomposition",
2280                              cr->nnodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2281     }
2282
2283     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2284     if (acs < cellsizeLimit)
2285     {
2286         if (options.numCells[XX] <= 0)
2287         {
2288             GMX_RELEASE_ASSERT(
2289                     false,
2290                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2291         }
2292         else
2293         {
2294             gmx_fatal_collective(
2295                     FARGS, cr->mpi_comm_mysim, MASTER(cr),
2296                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2297                     "options -dd, -rdd or -rcon, see the log file for details",
2298                     acs, cellsizeLimit);
2299         }
2300     }
2301
2302     const int numPPRanks =
2303             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2304     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2305     {
2306         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2307                              "The size of the domain decomposition grid (%d) does not match the "
2308                              "number of PP ranks (%d). The total number of ranks is %d",
2309                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2310     }
2311     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2312     {
2313         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2314                              "The number of separate PME ranks (%d) is larger than the number of "
2315                              "PP ranks (%d), this is not supported.",
2316                              ddGridSetup.numPmeOnlyRanks, numPPRanks);
2317     }
2318 }
2319
2320 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2321 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2322                                   t_commrec*           cr,
2323                                   const DDGridSetup&   ddGridSetup,
2324                                   const t_inputrec&    ir)
2325 {
2326     GMX_LOG(mdlog.info)
2327             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2328                                  ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2329                                  ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2330
2331     DDRankSetup ddRankSetup;
2332
2333     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2334     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2335
2336     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2337     if (ddRankSetup.usePmeOnlyRanks)
2338     {
2339         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2340     }
2341     else
2342     {
2343         ddRankSetup.numRanksDoingPme =
2344                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2345     }
2346
2347     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2348     {
2349         /* The following choices should match those
2350          * in comm_cost_est in domdec_setup.c.
2351          * Note that here the checks have to take into account
2352          * that the decomposition might occur in a different order than xyz
2353          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2354          * in which case they will not match those in comm_cost_est,
2355          * but since that is mainly for testing purposes that's fine.
2356          */
2357         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2358             && ddGridSetup.ddDimensions[1] == YY
2359             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2360             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2361             && getenv("GMX_PMEONEDD") == nullptr)
2362         {
2363             ddRankSetup.npmedecompdim = 2;
2364             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2365             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2366         }
2367         else
2368         {
2369             /* In case nc is 1 in both x and y we could still choose to
2370              * decompose pme in y instead of x, but we use x for simplicity.
2371              */
2372             ddRankSetup.npmedecompdim = 1;
2373             if (ddGridSetup.ddDimensions[0] == YY)
2374             {
2375                 ddRankSetup.npmenodes_x = 1;
2376                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2377             }
2378             else
2379             {
2380                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2381                 ddRankSetup.npmenodes_y = 1;
2382             }
2383         }
2384         GMX_LOG(mdlog.info)
2385                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2386                                      ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2387     }
2388     else
2389     {
2390         ddRankSetup.npmedecompdim = 0;
2391         ddRankSetup.npmenodes_x   = 0;
2392         ddRankSetup.npmenodes_y   = 0;
2393     }
2394
2395     return ddRankSetup;
2396 }
2397
2398 /*! \brief Set the cell size and interaction limits */
2399 static void set_dd_limits(const gmx::MDLogger& mdlog,
2400                           t_commrec*           cr,
2401                           gmx_domdec_t*        dd,
2402                           const DomdecOptions& options,
2403                           const DDSettings&    ddSettings,
2404                           const DDSystemInfo&  systemInfo,
2405                           const DDGridSetup&   ddGridSetup,
2406                           const int            numPPRanks,
2407                           const gmx_mtop_t*    mtop,
2408                           const t_inputrec*    ir,
2409                           const gmx_ddbox_t&   ddbox)
2410 {
2411     gmx_domdec_comm_t* comm = dd->comm;
2412     comm->ddSettings        = ddSettings;
2413
2414     /* Initialize to GPU share count to 0, might change later */
2415     comm->nrank_gpu_shared = 0;
2416
2417     comm->dlbState = comm->ddSettings.initialDlbState;
2418     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2419     /* To consider turning DLB on after 2*nstlist steps we need to check
2420      * at partitioning count 3. Thus we need to increase the first count by 2.
2421      */
2422     comm->ddPartioningCountFirstDlbOff += 2;
2423
2424     comm->bPMELoadBalDLBLimits = FALSE;
2425
2426     /* Allocate the charge group/atom sorting struct */
2427     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2428
2429     comm->systemInfo = systemInfo;
2430
2431     if (systemInfo.useUpdateGroups)
2432     {
2433         /* Note: We would like to use dd->nnodes for the atom count estimate,
2434          *       but that is not yet available here. But this anyhow only
2435          *       affect performance up to the second dd_partition_system call.
2436          */
2437         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2438         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2439                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2440                 homeAtomCountEstimate);
2441     }
2442
2443     /* Set the DD setup given by ddGridSetup */
2444     copy_ivec(ddGridSetup.numDomains, dd->nc);
2445     dd->ndim = ddGridSetup.numDDDimensions;
2446     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2447
2448     dd->nnodes = dd->nc[XX] * dd->nc[YY] * dd->nc[ZZ];
2449
2450     snew(comm->slb_frac, DIM);
2451     if (isDlbDisabled(comm))
2452     {
2453         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2454         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2455         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2456     }
2457
2458     /* Set the multi-body cut-off and cellsize limit for DLB */
2459     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2460     comm->cellsize_limit = systemInfo.cellsizeLimit;
2461     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2462     {
2463         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2464         {
2465             /* Set the bonded communication distance to halfway
2466              * the minimum and the maximum,
2467              * since the extra communication cost is nearly zero.
2468              */
2469             real acs           = average_cellsize_min(ddbox, dd->nc);
2470             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2471             if (!isDlbDisabled(comm))
2472             {
2473                 /* Check if this does not limit the scaling */
2474                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2475             }
2476             if (!systemInfo.filterBondedCommunication)
2477             {
2478                 /* Without bBondComm do not go beyond the n.b. cut-off */
2479                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2480                 if (comm->cellsize_limit >= systemInfo.cutoff)
2481                 {
2482                     /* We don't loose a lot of efficieny
2483                      * when increasing it to the n.b. cut-off.
2484                      * It can even be slightly faster, because we need
2485                      * less checks for the communication setup.
2486                      */
2487                     comm->cutoff_mbody = systemInfo.cutoff;
2488                 }
2489             }
2490             /* Check if we did not end up below our original limit */
2491             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2492
2493             if (comm->cutoff_mbody > comm->cellsize_limit)
2494             {
2495                 comm->cellsize_limit = comm->cutoff_mbody;
2496             }
2497         }
2498         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2499     }
2500
2501     if (debug)
2502     {
2503         fprintf(debug,
2504                 "Bonded atom communication beyond the cut-off: %s\n"
2505                 "cellsize limit %f\n",
2506                 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2507     }
2508
2509     if (MASTER(cr))
2510     {
2511         check_dd_restrictions(dd, ir, mdlog);
2512     }
2513 }
2514
2515 void dd_init_bondeds(FILE*              fplog,
2516                      gmx_domdec_t*      dd,
2517                      const gmx_mtop_t*  mtop,
2518                      const gmx_vsite_t* vsite,
2519                      const t_inputrec*  ir,
2520                      gmx_bool           bBCheck,
2521                      cginfo_mb_t*       cginfo_mb)
2522 {
2523     gmx_domdec_comm_t* comm;
2524
2525     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2526
2527     comm = dd->comm;
2528
2529     if (comm->systemInfo.filterBondedCommunication)
2530     {
2531         /* Communicate atoms beyond the cut-off for bonded interactions */
2532         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2533     }
2534     else
2535     {
2536         /* Only communicate atoms based on cut-off */
2537         comm->bondedLinks = nullptr;
2538     }
2539 }
2540
2541 static void writeSettings(gmx::TextWriter*   log,
2542                           gmx_domdec_t*      dd,
2543                           const gmx_mtop_t*  mtop,
2544                           const t_inputrec*  ir,
2545                           gmx_bool           bDynLoadBal,
2546                           real               dlb_scale,
2547                           const gmx_ddbox_t* ddbox)
2548 {
2549     gmx_domdec_comm_t* comm;
2550     int                d;
2551     ivec               np;
2552     real               limit, shrink;
2553
2554     comm = dd->comm;
2555
2556     if (bDynLoadBal)
2557     {
2558         log->writeString("The maximum number of communication pulses is:");
2559         for (d = 0; d < dd->ndim; d++)
2560         {
2561             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2562         }
2563         log->ensureLineBreak();
2564         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2565                                 comm->cellsize_limit);
2566         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2567         log->writeString("The allowed shrink of domain decomposition cells is:");
2568         for (d = 0; d < DIM; d++)
2569         {
2570             if (dd->nc[d] > 1)
2571             {
2572                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2573                 {
2574                     shrink = 0;
2575                 }
2576                 else
2577                 {
2578                     shrink = comm->cellsize_min_dlb[d]
2579                              / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->nc[d]);
2580                 }
2581                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2582             }
2583         }
2584         log->ensureLineBreak();
2585     }
2586     else
2587     {
2588         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2589         log->writeString("The initial number of communication pulses is:");
2590         for (d = 0; d < dd->ndim; d++)
2591         {
2592             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2593         }
2594         log->ensureLineBreak();
2595         log->writeString("The initial domain decomposition cell size is:");
2596         for (d = 0; d < DIM; d++)
2597         {
2598             if (dd->nc[d] > 1)
2599             {
2600                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2601             }
2602         }
2603         log->ensureLineBreak();
2604         log->writeLine();
2605     }
2606
2607     const bool haveInterDomainVsites =
2608             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2609
2610     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2611         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2612     {
2613         std::string decompUnits;
2614         if (comm->systemInfo.useUpdateGroups)
2615         {
2616             decompUnits = "atom groups";
2617         }
2618         else
2619         {
2620             decompUnits = "atoms";
2621         }
2622
2623         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2624                                 decompUnits.c_str());
2625         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "",
2626                                 comm->systemInfo.cutoff);
2627
2628         if (bDynLoadBal)
2629         {
2630             limit = dd->comm->cellsize_limit;
2631         }
2632         else
2633         {
2634             if (dd->unitCellInfo.ddBoxIsDynamic)
2635             {
2636                 log->writeLine(
2637                         "(the following are initial values, they could change due to box "
2638                         "deformation)");
2639             }
2640             limit = dd->comm->cellsize_min[XX];
2641             for (d = 1; d < DIM; d++)
2642             {
2643                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2644             }
2645         }
2646
2647         if (comm->systemInfo.haveInterDomainBondeds)
2648         {
2649             log->writeLineFormatted("%40s  %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2650                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2651             log->writeLineFormatted("%40s  %-7s %6.3f nm", "multi-body bonded interactions",
2652                                     "(-rdd)",
2653                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2654                                             ? comm->cutoff_mbody
2655                                             : std::min(comm->systemInfo.cutoff, limit));
2656         }
2657         if (haveInterDomainVsites)
2658         {
2659             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2660         }
2661         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2662         {
2663             std::string separation =
2664                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2665             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2666         }
2667         log->ensureLineBreak();
2668     }
2669 }
2670
2671 static void logSettings(const gmx::MDLogger& mdlog,
2672                         gmx_domdec_t*        dd,
2673                         const gmx_mtop_t*    mtop,
2674                         const t_inputrec*    ir,
2675                         real                 dlb_scale,
2676                         const gmx_ddbox_t*   ddbox)
2677 {
2678     gmx::StringOutputStream stream;
2679     gmx::TextWriter         log(&stream);
2680     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2681     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2682     {
2683         {
2684             log.ensureEmptyLine();
2685             log.writeLine(
2686                     "When dynamic load balancing gets turned on, these settings will change to:");
2687         }
2688         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2689     }
2690     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2691 }
2692
2693 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2694                                 gmx_domdec_t*        dd,
2695                                 real                 dlb_scale,
2696                                 const t_inputrec*    ir,
2697                                 const gmx_ddbox_t*   ddbox)
2698 {
2699     gmx_domdec_comm_t* comm;
2700     int                d, dim, npulse, npulse_d_max, npulse_d;
2701     gmx_bool           bNoCutOff;
2702
2703     comm = dd->comm;
2704
2705     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2706
2707     /* Determine the maximum number of comm. pulses in one dimension */
2708
2709     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2710
2711     /* Determine the maximum required number of grid pulses */
2712     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2713     {
2714         /* Only a single pulse is required */
2715         npulse = 1;
2716     }
2717     else if (!bNoCutOff && comm->cellsize_limit > 0)
2718     {
2719         /* We round down slightly here to avoid overhead due to the latency
2720          * of extra communication calls when the cut-off
2721          * would be only slightly longer than the cell size.
2722          * Later cellsize_limit is redetermined,
2723          * so we can not miss interactions due to this rounding.
2724          */
2725         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2726     }
2727     else
2728     {
2729         /* There is no cell size limit */
2730         npulse = std::max(dd->nc[XX] - 1, std::max(dd->nc[YY] - 1, dd->nc[ZZ] - 1));
2731     }
2732
2733     if (!bNoCutOff && npulse > 1)
2734     {
2735         /* See if we can do with less pulses, based on dlb_scale */
2736         npulse_d_max = 0;
2737         for (d = 0; d < dd->ndim; d++)
2738         {
2739             dim      = dd->dim[d];
2740             npulse_d = static_cast<int>(
2741                     1
2742                     + dd->nc[dim] * comm->systemInfo.cutoff
2743                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2744             npulse_d_max = std::max(npulse_d_max, npulse_d);
2745         }
2746         npulse = std::min(npulse, npulse_d_max);
2747     }
2748
2749     /* This env var can override npulse */
2750     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2751     if (d > 0)
2752     {
2753         npulse = d;
2754     }
2755
2756     comm->maxpulse       = 1;
2757     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2758     for (d = 0; d < dd->ndim; d++)
2759     {
2760         if (comm->ddSettings.request1DAnd1Pulse)
2761         {
2762             comm->cd[d].np_dlb = 1;
2763         }
2764         else
2765         {
2766             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]] - 1);
2767             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2768         }
2769         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]] - 1)
2770         {
2771             comm->bVacDLBNoLimit = FALSE;
2772         }
2773     }
2774
2775     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2776     if (!comm->bVacDLBNoLimit)
2777     {
2778         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2779     }
2780     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2781     /* Set the minimum cell size for each DD dimension */
2782     for (d = 0; d < dd->ndim; d++)
2783     {
2784         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2785         {
2786             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2787         }
2788         else
2789         {
2790             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2791         }
2792     }
2793     if (comm->cutoff_mbody <= 0)
2794     {
2795         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2796     }
2797     if (isDlbOn(comm))
2798     {
2799         set_dlb_limits(dd);
2800     }
2801 }
2802
2803 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2804 {
2805     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2806 }
2807
2808 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, int ePBC)
2809 {
2810     /* If each molecule is a single charge group
2811      * or we use domain decomposition for each periodic dimension,
2812      * we do not need to take pbc into account for the bonded interactions.
2813      */
2814     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds
2815             && !(dd->nc[XX] > 1 && dd->nc[YY] > 1 && (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2816 }
2817
2818 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2819 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2820                                   gmx_domdec_t*        dd,
2821                                   real                 dlb_scale,
2822                                   const gmx_mtop_t*    mtop,
2823                                   const t_inputrec*    ir,
2824                                   const gmx_ddbox_t*   ddbox)
2825 {
2826     gmx_domdec_comm_t* comm        = dd->comm;
2827     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2828
2829     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2830     {
2831         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2832         if (ddRankSetup.npmedecompdim >= 2)
2833         {
2834             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2835         }
2836     }
2837     else
2838     {
2839         ddRankSetup.numRanksDoingPme = 0;
2840         if (dd->pme_nodeid >= 0)
2841         {
2842             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2843                                  "Can not have separate PME ranks without PME electrostatics");
2844         }
2845     }
2846
2847     if (debug)
2848     {
2849         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2850     }
2851     if (!isDlbDisabled(comm))
2852     {
2853         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2854     }
2855
2856     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2857
2858     real vol_frac;
2859     if (ir->ePBC == epbcNONE)
2860     {
2861         vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2862     }
2863     else
2864     {
2865         vol_frac = (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))
2866                    / static_cast<double>(dd->nnodes);
2867     }
2868     if (debug)
2869     {
2870         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2871     }
2872     int natoms_tot = mtop->natoms;
2873
2874     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2875 }
2876
2877 /*! \brief Get some important DD parameters which can be modified by env.vars */
2878 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2879                                 const DomdecOptions&     options,
2880                                 const gmx::MdrunOptions& mdrunOptions,
2881                                 const t_inputrec&        ir)
2882 {
2883     DDSettings ddSettings;
2884
2885     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2886     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2887     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2888     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2889     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2890     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2891     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2892     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2893     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2894     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2895
2896     if (ddSettings.useSendRecv2)
2897     {
2898         GMX_LOG(mdlog.info)
2899                 .appendText(
2900                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2901                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2902                         "communication");
2903     }
2904
2905     if (ddSettings.eFlop)
2906     {
2907         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2908         ddSettings.recordLoad = true;
2909     }
2910     else
2911     {
2912         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2913     }
2914
2915     ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2916                                                           ddSettings.recordLoad, mdrunOptions, &ir);
2917     GMX_LOG(mdlog.info)
2918             .appendTextFormatted("Dynamic load balancing: %s",
2919                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2920
2921     return ddSettings;
2922 }
2923
2924 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2925
2926 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2927  *
2928  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2929  * generally requires a larger box than other possible decompositions
2930  * with the same rank count, so the calling code might need to decide
2931  * what is the most appropriate way to run the simulation based on
2932  * whether such a DD is possible.
2933  *
2934  * This function works like init_domain_decomposition(), but will not
2935  * give a fatal error, and only uses \c cr for communicating between
2936  * ranks.
2937  *
2938  * It is safe to call before thread-MPI spawns ranks, so that
2939  * thread-MPI can decide whether and how to trigger the GPU halo
2940  * exchange code path. The number of PME ranks, if any, should be set
2941  * in \c options.numPmeRanks.
2942  */
2943 static bool canMake1DAnd1PulseDomainDecomposition(const DDSettings&              ddSettingsOriginal,
2944                                                   const t_commrec*               cr,
2945                                                   const int                      numRanksRequested,
2946                                                   const DomdecOptions&           options,
2947                                                   const gmx_mtop_t&              mtop,
2948                                                   const t_inputrec&              ir,
2949                                                   const matrix                   box,
2950                                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2951 {
2952     // Ensure we don't write any output from this checking routine
2953     gmx::MDLogger dummyLogger;
2954
2955     DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2956
2957     DDSettings ddSettings             = ddSettingsOriginal;
2958     ddSettings.request1DAnd1Pulse     = true;
2959     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
2960             dummyLogger, ddSettings.request1DAnd1Pulse, !isDlbDisabled(ddSettings.initialDlbState),
2961             options.dlbScaling, ir, systemInfo.cellsizeLimit);
2962     gmx_ddbox_t ddbox = { 0 };
2963     DDGridSetup ddGridSetup =
2964             getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
2965                            gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
2966
2967     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2968
2969     return canMakeDDWith1DAnd1Pulse;
2970 }
2971
2972 bool is1DAnd1PulseDD(const gmx_domdec_t& dd)
2973 {
2974     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2975     const int  productOfDimensionSizes      = dd.nc[XX] * dd.nc[YY] * dd.nc[ZZ];
2976     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2977
2978     const bool hasMax1Pulse =
2979             ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff)
2980              || (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
2981
2982     return decompositionHasOneDimension && hasMax1Pulse;
2983 }
2984
2985 namespace gmx
2986 {
2987
2988 // TODO once the functionality stablizes, move this class and
2989 // supporting functionality into builder.cpp
2990 /*! \brief Impl class for DD builder */
2991 class DomainDecompositionBuilder::Impl
2992 {
2993 public:
2994     //! Constructor
2995     Impl(const MDLogger&      mdlog,
2996          t_commrec*           cr,
2997          const DomdecOptions& options,
2998          const MdrunOptions&  mdrunOptions,
2999          bool                 prefer1DAnd1Pulse,
3000          const gmx_mtop_t&    mtop,
3001          const t_inputrec&    ir,
3002          const matrix         box,
3003          ArrayRef<const RVec> xGlobal);
3004
3005     //! Build the resulting DD manager
3006     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
3007
3008     //! Objects used in constructing and configuring DD
3009     //! {
3010     //! Logging object
3011     const MDLogger& mdlog_;
3012     //! Communication object
3013     t_commrec* cr_;
3014     //! User-supplied options configuring DD behavior
3015     const DomdecOptions options_;
3016     //! Global system topology
3017     const gmx_mtop_t& mtop_;
3018     //! User input values from the tpr file
3019     const t_inputrec& ir_;
3020     //! }
3021
3022     //! Internal objects used in constructing DD
3023     //! {
3024     //! Settings combined from the user input
3025     DDSettings ddSettings_;
3026     //! Information derived from the simulation system
3027     DDSystemInfo systemInfo_;
3028     //! Box structure
3029     gmx_ddbox_t ddbox_ = { 0 };
3030     //! Organization of the DD grids
3031     DDGridSetup ddGridSetup_;
3032     //! Organzation of the DD ranks
3033     DDRankSetup ddRankSetup_;
3034     //! Number of DD cells in each dimension
3035     ivec ddCellIndex_ = { 0, 0, 0 };
3036     //! IDs of PME-only ranks
3037     std::vector<int> pmeRanks_;
3038     //! Contains a valid Cartesian-communicator-based setup, or defaults.
3039     CartesianRankSetup cartSetup_;
3040     //! }
3041 };
3042
3043 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
3044                                        t_commrec*           cr,
3045                                        const DomdecOptions& options,
3046                                        const MdrunOptions&  mdrunOptions,
3047                                        const bool           prefer1DAnd1Pulse,
3048                                        const gmx_mtop_t&    mtop,
3049                                        const t_inputrec&    ir,
3050                                        const matrix         box,
3051                                        ArrayRef<const RVec> xGlobal) :
3052     mdlog_(mdlog),
3053     cr_(cr),
3054     options_(options),
3055     mtop_(mtop),
3056     ir_(ir)
3057 {
3058     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3059
3060     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3061
3062     if (prefer1DAnd1Pulse
3063         && canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_,
3064                                                  ir_, box, xGlobal))
3065     {
3066         ddSettings_.request1DAnd1Pulse = true;
3067     }
3068
3069     if (ddSettings_.eFlop > 1)
3070     {
3071         /* Ensure that we have different random flop counts on different ranks */
3072         srand(1 + cr_->nodeid);
3073     }
3074
3075     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3076
3077     const int numRanksRequested = cr_->nnodes;
3078     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3079
3080     // DD grid setup uses a more different cell size limit for
3081     // automated setup than the one in systemInfo_. The latter is used
3082     // in set_dd_limits() to configure DLB, for example.
3083     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
3084             mdlog_, ddSettings_.request1DAnd1Pulse, !isDlbDisabled(ddSettings_.initialDlbState),
3085             options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3086     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
3087                                   gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3088     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3089
3090     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3091
3092     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3093
3094     /* Generate the group communicator, also decides the duty of each rank */
3095     cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_,
3096                                         ddCellIndex_, &pmeRanks_);
3097 }
3098
3099 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3100 {
3101     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3102
3103     copy_ivec(ddCellIndex_, dd->ci);
3104
3105     dd->comm = init_dd_comm();
3106
3107     dd->comm->ddRankSetup        = ddRankSetup_;
3108     dd->comm->cartesianRankSetup = cartSetup_;
3109
3110     set_dd_limits(mdlog_, cr_, dd, options_, ddSettings_, systemInfo_, ddGridSetup_,
3111                   ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3112
3113     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3114
3115     if (thisRankHasDuty(cr_, DUTY_PP))
3116     {
3117         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3118
3119         setup_neighbor_relations(dd);
3120     }
3121
3122     /* Set overallocation to avoid frequent reallocation of arrays */
3123     set_over_alloc_dd(TRUE);
3124
3125     dd->atomSets = atomSets;
3126
3127     return dd;
3128 }
3129
3130 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3131                                                        t_commrec*           cr,
3132                                                        const DomdecOptions& options,
3133                                                        const MdrunOptions&  mdrunOptions,
3134                                                        const bool           prefer1DAnd1Pulse,
3135                                                        const gmx_mtop_t&    mtop,
3136                                                        const t_inputrec&    ir,
3137                                                        const matrix         box,
3138                                                        ArrayRef<const RVec> xGlobal) :
3139     impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3140 {
3141 }
3142
3143 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3144 {
3145     return impl_->build(atomSets);
3146 }
3147
3148 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3149
3150 } // namespace gmx
3151
3152 static gmx_bool test_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3153 {
3154     gmx_domdec_t* dd;
3155     gmx_ddbox_t   ddbox;
3156     int           d, dim, np;
3157     real          inv_cell_size;
3158     int           LocallyLimited;
3159
3160     dd = cr->dd;
3161
3162     set_ddbox(*dd, false, box, true, x, &ddbox);
3163
3164     LocallyLimited = 0;
3165
3166     for (d = 0; d < dd->ndim; d++)
3167     {
3168         dim = dd->dim[d];
3169
3170         inv_cell_size = DD_CELL_MARGIN * dd->nc[dim] / ddbox.box_size[dim];
3171         if (dd->unitCellInfo.ddBoxIsDynamic)
3172         {
3173             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3174         }
3175
3176         np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3177
3178         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3179         {
3180             return FALSE;
3181         }
3182
3183         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3184         {
3185             if (np > dd->comm->cd[d].np_dlb)
3186             {
3187                 return FALSE;
3188             }
3189
3190             /* If a current local cell size is smaller than the requested
3191              * cut-off, we could still fix it, but this gets very complicated.
3192              * Without fixing here, we might actually need more checks.
3193              */
3194             real cellSizeAlongDim =
3195                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3196             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3197             {
3198                 LocallyLimited = 1;
3199             }
3200         }
3201     }
3202
3203     if (!isDlbDisabled(dd->comm))
3204     {
3205         /* If DLB is not active yet, we don't need to check the grid jumps.
3206          * Actually we shouldn't, because then the grid jump data is not set.
3207          */
3208         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3209         {
3210             LocallyLimited = 1;
3211         }
3212
3213         gmx_sumi(1, &LocallyLimited, cr);
3214
3215         if (LocallyLimited > 0)
3216         {
3217             return FALSE;
3218         }
3219     }
3220
3221     return TRUE;
3222 }
3223
3224 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3225 {
3226     gmx_bool bCutoffAllowed;
3227
3228     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3229
3230     if (bCutoffAllowed)
3231     {
3232         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3233     }
3234
3235     return bCutoffAllowed;
3236 }