src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
   5  * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
   6  * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
   7  * Copyright (c) 2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38
  39 #include "gmxpre.h"
  40
  41 #include "domdec.h"
  42
  43 #include "config.h"
  44
  45 #include <cassert>
  46 #include <cinttypes>
  47 #include <climits>
  48 #include <cmath>
  49 #include <cstdio>
  50 #include <cstdlib>
  51 #include <cstring>
  52
  53 #include <algorithm>
  54 #include <memory>
  55
  56 #include "gromacs/domdec/builder.h"
  57 #include "gromacs/domdec/collect.h"
  58 #include "gromacs/domdec/dlb.h"
  59 #include "gromacs/domdec/dlbtiming.h"
  60 #include "gromacs/domdec/domdec_network.h"
  61 #include "gromacs/domdec/ga2la.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/options.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/gpu_utils/device_stream_manager.h"
  68 #include "gromacs/gpu_utils/gpu_utils.h"
  69 #include "gromacs/hardware/hw_info.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/math/vectypes.h"
  72 #include "gromacs/mdlib/calc_verletbuf.h"
  73 #include "gromacs/mdlib/constr.h"
  74 #include "gromacs/mdlib/constraintrange.h"
  75 #include "gromacs/mdlib/updategroups.h"
  76 #include "gromacs/mdlib/vsite.h"
  77 #include "gromacs/mdtypes/commrec.h"
  78 #include "gromacs/mdtypes/forceoutput.h"
  79 #include "gromacs/mdtypes/inputrec.h"
  80 #include "gromacs/mdtypes/mdrunoptions.h"
  81 #include "gromacs/mdtypes/state.h"
  82 #include "gromacs/pbcutil/ishift.h"
  83 #include "gromacs/pbcutil/pbc.h"
  84 #include "gromacs/pulling/pull.h"
  85 #include "gromacs/timing/wallcycle.h"
  86 #include "gromacs/topology/block.h"
  87 #include "gromacs/topology/idef.h"
  88 #include "gromacs/topology/ifunc.h"
  89 #include "gromacs/topology/mtop_lookup.h"
  90 #include "gromacs/topology/mtop_util.h"
  91 #include "gromacs/topology/topology.h"
  92 #include "gromacs/utility/basedefinitions.h"
  93 #include "gromacs/utility/basenetwork.h"
  94 #include "gromacs/utility/cstringutil.h"
  95 #include "gromacs/utility/exceptions.h"
  96 #include "gromacs/utility/fatalerror.h"
  97 #include "gromacs/utility/gmxmpi.h"
  98 #include "gromacs/utility/logger.h"
  99 #include "gromacs/utility/real.h"
 100 #include "gromacs/utility/smalloc.h"
 101 #include "gromacs/utility/strconvert.h"
 102 #include "gromacs/utility/stringstream.h"
 103 #include "gromacs/utility/stringutil.h"
 104 #include "gromacs/utility/textwriter.h"
 105
 106 #include "atomdistribution.h"
 107 #include "box.h"
 108 #include "cellsizes.h"
 109 #include "distribute.h"
 110 #include "domdec_constraints.h"
 111 #include "domdec_internal.h"
 112 #include "domdec_setup.h"
 113 #include "domdec_vsite.h"
 114 #include "redistribute.h"
 115 #include "utility.h"
 116
 117 // TODO remove this when moving domdec into gmx namespace
 118 using gmx::DdRankOrder;
 119 using gmx::DlbOption;
 120 using gmx::DomdecOptions;
 121
 122 static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
 123
 124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_CGIBS 2
 126
 127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 128 #define DD_FLAG_NRCG 65535
 129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
 130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
 131
 132 /* The DD zone order */
 133 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
 134                                         { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
 135
 136 /* The non-bonded zone-pair setup for domain decomposition
 137  * The first number is the i-zone, the second number the first j-zone seen by
 138  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 139  * As is, this is for 3D decomposition, where there are 4 i-zones.
 140  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 141  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 142  */
 143 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
 144                                                                { 1, 3, 6 },
 145                                                                { 2, 5, 6 },
 146                                                                { 3, 5, 7 } };
 147
 148 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 149 {
 150     xyz[XX] = ind / (nc[YY] * nc[ZZ]);
 151     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 152     xyz[ZZ] = ind % nc[ZZ];
 153 }
 154
 155 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
 156 {
 157     int ddnodeid = -1;
 158
 159     const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 160     const int                 ddindex   = dd_index(dd->numCells, c);
 161     if (cartSetup.bCartesianPP_PME)
 162     {
 163         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 164     }
 165     else if (cartSetup.bCartesianPP)
 166     {
 167 #if GMX_MPI
 168         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 169 #endif
 170     }
 171     else
 172     {
 173         ddnodeid = ddindex;
 174     }
 175
 176     return ddnodeid;
 177 }
 178
 179 int ddglatnr(const gmx_domdec_t* dd, int i)
 180 {
 181     int atnr = 0;
 182
 183     if (dd == nullptr)
 184     {
 185         atnr = i + 1;
 186     }
 187     else
 188     {
 189         if (i >= dd->comm->atomRanges.numAtomsTotal())
 190         {
 191             gmx_fatal(FARGS,
 192                       "glatnr called with %d, which is larger than the local number of atoms (%d)",
 193                       i,
 194                       dd->comm->atomRanges.numAtomsTotal());
 195         }
 196         atnr = dd->globalAtomIndices[i] + 1;
 197     }
 198
 199     return atnr;
 200 }
 201
 202 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
 203 {
 204     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 205     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 206 }
 207
 208 void dd_store_state(gmx_domdec_t* dd, t_state* state)
 209 {
 210     if (state->ddp_count != dd->ddp_count)
 211     {
 212         gmx_incons("The MD state does not match the domain decomposition state");
 213     }
 214
 215     state->cg_gl.resize(dd->ncg_home);
 216     for (int i = 0; i < dd->ncg_home; i++)
 217     {
 218         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 219     }
 220
 221     state->ddp_count_cg_gl = dd->ddp_count;
 222 }
 223
 224 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
 225 {
 226     return &dd->comm->zones;
 227 }
 228
 229 int dd_numAtomsZones(const gmx_domdec_t& dd)
 230 {
 231     return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
 232 }
 233
 234 int dd_numHomeAtoms(const gmx_domdec_t& dd)
 235 {
 236     return dd.comm->atomRanges.numHomeAtoms();
 237 }
 238
 239 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
 240 {
 241     /* We currently set mdatoms entries for all atoms:
 242      * local + non-local + communicated for vsite + constraints
 243      */
 244
 245     return dd->comm->atomRanges.numAtomsTotal();
 246 }
 247
 248 int dd_natoms_vsite(const gmx_domdec_t* dd)
 249 {
 250     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 251 }
 252
 253 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
 254 {
 255     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 256     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 257 }
 258
 259 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
 260 {
 261     wallcycle_start(wcycle, ewcMOVEX);
 262
 263     rvec shift = { 0, 0, 0 };
 264
 265     gmx_domdec_comm_t* comm = dd->comm;
 266
 267     int nzone   = 1;
 268     int nat_tot = comm->atomRanges.numHomeAtoms();
 269     for (int d = 0; d < dd->ndim; d++)
 270     {
 271         const bool bPBC   = (dd->ci[dd->dim[d]] == 0);
 272         const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 273         if (bPBC)
 274         {
 275             copy_rvec(box[dd->dim[d]], shift);
 276         }
 277         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 278         for (const gmx_domdec_ind_t& ind : cd->ind)
 279         {
 280             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 281             gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
 282             int                       n          = 0;
 283             if (!bPBC)
 284             {
 285                 for (int j : ind.index)
 286                 {
 287                     sendBuffer[n] = x[j];
 288                     n++;
 289                 }
 290             }
 291             else if (!bScrew)
 292             {
 293                 for (int j : ind.index)
 294                 {
 295                     /* We need to shift the coordinates */
 296                     for (int d = 0; d < DIM; d++)
 297                     {
 298                         sendBuffer[n][d] = x[j][d] + shift[d];
 299                     }
 300                     n++;
 301                 }
 302             }
 303             else
 304             {
 305                 for (int j : ind.index)
 306                 {
 307                     /* Shift x */
 308                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 309                     /* Rotate y and z.
 310                      * This operation requires a special shift force
 311                      * treatment, which is performed in calc_vir.
 312                      */
 313                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 314                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 315                     n++;
 316                 }
 317             }
 318
 319             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 320                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 321
 322             gmx::ArrayRef<gmx::RVec> receiveBuffer;
 323             if (cd->receiveInPlace)
 324             {
 325                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 326             }
 327             else
 328             {
 329                 receiveBuffer = receiveBufferAccess.buffer;
 330             }
 331             /* Send and receive the coordinates */
 332             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 333
 334             if (!cd->receiveInPlace)
 335             {
 336                 int j = 0;
 337                 for (int zone = 0; zone < nzone; zone++)
 338                 {
 339                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 340                     {
 341                         x[i] = receiveBuffer[j++];
 342                     }
 343                 }
 344             }
 345             nat_tot += ind.nrecv[nzone + 1];
 346         }
 347         nzone += nzone;
 348     }
 349
 350     wallcycle_stop(wcycle, ewcMOVEX);
 351 }
 352
 353 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
 354 {
 355     wallcycle_start(wcycle, ewcMOVEF);
 356
 357     gmx::ArrayRef<gmx::RVec> f      = forceWithShiftForces->force();
 358     gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
 359
 360     gmx_domdec_comm_t& comm    = *dd->comm;
 361     int                nzone   = comm.zones.n / 2;
 362     int                nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 363     for (int d = dd->ndim - 1; d >= 0; d--)
 364     {
 365         /* Only forces in domains near the PBC boundaries need to
 366            consider PBC in the treatment of fshift */
 367         const bool shiftForcesNeedPbc =
 368                 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 369         const bool applyScrewPbc =
 370                 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 371         /* Determine which shift vector we need */
 372         ivec vis        = { 0, 0, 0 };
 373         vis[dd->dim[d]] = 1;
 374         const int is    = IVEC2IS(vis);
 375
 376         /* Loop over the pulses */
 377         const gmx_domdec_comm_dim_t& cd = comm.cd[d];
 378         for (int p = cd.numPulses() - 1; p >= 0; p--)
 379         {
 380             const gmx_domdec_ind_t&   ind = cd.ind[p];
 381             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 382             gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
 383
 384             nat_tot -= ind.nrecv[nzone + 1];
 385
 386             DDBufferAccess<gmx::RVec> sendBufferAccess(
 387                     comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 388
 389             gmx::ArrayRef<gmx::RVec> sendBuffer;
 390             if (cd.receiveInPlace)
 391             {
 392                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 393             }
 394             else
 395             {
 396                 sendBuffer = sendBufferAccess.buffer;
 397                 int j      = 0;
 398                 for (int zone = 0; zone < nzone; zone++)
 399                 {
 400                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 401                     {
 402                         sendBuffer[j++] = f[i];
 403                     }
 404                 }
 405             }
 406             /* Communicate the forces */
 407             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 408             /* Add the received forces */
 409             int n = 0;
 410             if (!shiftForcesNeedPbc)
 411             {
 412                 for (int j : ind.index)
 413                 {
 414                     for (int d = 0; d < DIM; d++)
 415                     {
 416                         f[j][d] += receiveBuffer[n][d];
 417                     }
 418                     n++;
 419                 }
 420             }
 421             else if (!applyScrewPbc)
 422             {
 423                 for (int j : ind.index)
 424                 {
 425                     for (int d = 0; d < DIM; d++)
 426                     {
 427                         f[j][d] += receiveBuffer[n][d];
 428                     }
 429                     /* Add this force to the shift force */
 430                     for (int d = 0; d < DIM; d++)
 431                     {
 432                         fshift[is][d] += receiveBuffer[n][d];
 433                     }
 434                     n++;
 435                 }
 436             }
 437             else
 438             {
 439                 for (int j : ind.index)
 440                 {
 441                     /* Rotate the force */
 442                     f[j][XX] += receiveBuffer[n][XX];
 443                     f[j][YY] -= receiveBuffer[n][YY];
 444                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 445                     if (shiftForcesNeedPbc)
 446                     {
 447                         /* Add this force to the shift force */
 448                         for (int d = 0; d < DIM; d++)
 449                         {
 450                             fshift[is][d] += receiveBuffer[n][d];
 451                         }
 452                     }
 453                     n++;
 454                 }
 455             }
 456         }
 457         nzone /= 2;
 458     }
 459     wallcycle_stop(wcycle, ewcMOVEF);
 460 }
 461
 462 /* Convenience function for extracting a real buffer from an rvec buffer
 463  *
 464  * To reduce the number of temporary communication buffers and avoid
 465  * cache polution, we reuse gmx::RVec buffers for storing reals.
 466  * This functions return a real buffer reference with the same number
 467  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 468  */
 469 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 470 {
 471     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
 472 }
 473
 474 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
 475 {
 476     gmx_domdec_comm_t* comm = dd->comm;
 477
 478     int nzone   = 1;
 479     int nat_tot = comm->atomRanges.numHomeAtoms();
 480     for (int d = 0; d < dd->ndim; d++)
 481     {
 482         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 483         for (const gmx_domdec_ind_t& ind : cd->ind)
 484         {
 485             /* Note: We provision for RVec instead of real, so a factor of 3
 486              * more than needed. The buffer actually already has this size
 487              * and we pass a plain pointer below, so this does not matter.
 488              */
 489             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 490             gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 491             int                 n          = 0;
 492             for (int j : ind.index)
 493             {
 494                 sendBuffer[n++] = v[j];
 495             }
 496
 497             DDBufferAccess<gmx::RVec> receiveBufferAccess(
 498                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 499
 500             gmx::ArrayRef<real> receiveBuffer;
 501             if (cd->receiveInPlace)
 502             {
 503                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 504             }
 505             else
 506             {
 507                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 508             }
 509             /* Send and receive the data */
 510             ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
 511             if (!cd->receiveInPlace)
 512             {
 513                 int j = 0;
 514                 for (int zone = 0; zone < nzone; zone++)
 515                 {
 516                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 517                     {
 518                         v[i] = receiveBuffer[j++];
 519                     }
 520                 }
 521             }
 522             nat_tot += ind.nrecv[nzone + 1];
 523         }
 524         nzone += nzone;
 525     }
 526 }
 527
 528 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
 529 {
 530     gmx_domdec_comm_t* comm = dd->comm;
 531
 532     int nzone   = comm->zones.n / 2;
 533     int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 534     for (int d = dd->ndim - 1; d >= 0; d--)
 535     {
 536         gmx_domdec_comm_dim_t* cd = &comm->cd[d];
 537         for (int p = cd->numPulses() - 1; p >= 0; p--)
 538         {
 539             const gmx_domdec_ind_t& ind = cd->ind[p];
 540
 541             /* Note: We provision for RVec instead of real, so a factor of 3
 542              * more than needed. The buffer actually already has this size
 543              * and we typecast, so this works as intended.
 544              */
 545             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 546             gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 547             nat_tot -= ind.nrecv[nzone + 1];
 548
 549             DDBufferAccess<gmx::RVec> sendBufferAccess(
 550                     comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 551
 552             gmx::ArrayRef<real> sendBuffer;
 553             if (cd->receiveInPlace)
 554             {
 555                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 556             }
 557             else
 558             {
 559                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 560                 int j      = 0;
 561                 for (int zone = 0; zone < nzone; zone++)
 562                 {
 563                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 564                     {
 565                         sendBuffer[j++] = v[i];
 566                     }
 567                 }
 568             }
 569             /* Communicate the forces */
 570             ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
 571             /* Add the received forces */
 572             int n = 0;
 573             for (int j : ind.index)
 574             {
 575                 v[j] += receiveBuffer[n];
 576                 n++;
 577             }
 578         }
 579         nzone /= 2;
 580     }
 581 }
 582
 583 real dd_cutoff_multibody(const gmx_domdec_t* dd)
 584 {
 585     const gmx_domdec_comm_t& comm       = *dd->comm;
 586     const DDSystemInfo&      systemInfo = comm.systemInfo;
 587
 588     real r = -1;
 589     if (systemInfo.haveInterDomainMultiBodyBondeds)
 590     {
 591         if (comm.cutoff_mbody > 0)
 592         {
 593             r = comm.cutoff_mbody;
 594         }
 595         else
 596         {
 597             /* cutoff_mbody=0 means we do not have DLB */
 598             r = comm.cellsize_min[dd->dim[0]];
 599             for (int di = 1; di < dd->ndim; di++)
 600             {
 601                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 602             }
 603             if (comm.systemInfo.filterBondedCommunication)
 604             {
 605                 r = std::max(r, comm.cutoff_mbody);
 606             }
 607             else
 608             {
 609                 r = std::min(r, systemInfo.cutoff);
 610             }
 611         }
 612     }
 613
 614     return r;
 615 }
 616
 617 real dd_cutoff_twobody(const gmx_domdec_t* dd)
 618 {
 619     const real r_mb = dd_cutoff_multibody(dd);
 620
 621     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 622 }
 623
 624
 625 static void dd_cart_coord2pmecoord(const DDRankSetup&        ddRankSetup,
 626                                    const CartesianRankSetup& cartSetup,
 627                                    const ivec                coord,
 628                                    ivec                      coord_pme)
 629 {
 630     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 631     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 632     copy_ivec(coord, coord_pme);
 633     coord_pme[cartSetup.cartpmedim] =
 634             nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
 635 }
 636
 637 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 638 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
 639 {
 640     const int npp  = ddRankSetup.numPPRanks;
 641     const int npme = ddRankSetup.numRanksDoingPme;
 642
 643     /* Here we assign a PME node to communicate with this DD node
 644      * by assuming that the major index of both is x.
 645      * We add npme/2 to obtain an even distribution.
 646      */
 647     return (ddCellIndex * npme + npme / 2) / npp;
 648 }
 649
 650 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
 651 {
 652     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 653
 654     int n = 0;
 655     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 656     {
 657         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 658         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 659         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 660         {
 661             if (debug)
 662             {
 663                 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
 664             }
 665             pmeRanks[n] = i + 1 + n;
 666             n++;
 667         }
 668     }
 669
 670     return pmeRanks;
 671 }
 672
 673 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
 674 {
 675     ivec coords;
 676
 677     gmx_domdec_t* dd = cr->dd;
 678     coords[XX]       = x;
 679     coords[YY]       = y;
 680     coords[ZZ]       = z;
 681     const int slab   = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
 682
 683     return slab;
 684 }
 685
 686 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
 687 {
 688     const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
 689     ivec                      coords    = { x, y, z };
 690     int                       nodeid    = -1;
 691
 692     if (cartSetup.bCartesianPP_PME)
 693     {
 694 #if GMX_MPI
 695         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 696 #endif
 697     }
 698     else
 699     {
 700         int ddindex = dd_index(cr->dd->numCells, coords);
 701         if (cartSetup.bCartesianPP)
 702         {
 703             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 704         }
 705         else
 706         {
 707             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 708             {
 709                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 710             }
 711             else
 712             {
 713                 nodeid = ddindex;
 714             }
 715         }
 716     }
 717
 718     return nodeid;
 719 }
 720
 721 static int dd_simnode2pmenode(const DDRankSetup&        ddRankSetup,
 722                               const CartesianRankSetup& cartSetup,
 723                               gmx::ArrayRef<const int>  pmeRanks,
 724                               const t_commrec gmx_unused* cr,
 725                               const int                   sim_nodeid)
 726 {
 727     int pmenode = -1;
 728
 729     /* This assumes a uniform x domain decomposition grid cell size */
 730     if (cartSetup.bCartesianPP_PME)
 731     {
 732 #if GMX_MPI
 733         ivec coord, coord_pme;
 734         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 735         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 736         {
 737             /* This is a PP rank */
 738             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 739             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 740         }
 741 #endif
 742     }
 743     else if (cartSetup.bCartesianPP)
 744     {
 745         if (sim_nodeid < ddRankSetup.numPPRanks)
 746         {
 747             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 748         }
 749     }
 750     else
 751     {
 752         /* This assumes DD cells with identical x coordinates
 753          * are numbered sequentially.
 754          */
 755         if (pmeRanks.empty())
 756         {
 757             if (sim_nodeid < ddRankSetup.numPPRanks)
 758             {
 759                 /* The DD index equals the nodeid */
 760                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 761             }
 762         }
 763         else
 764         {
 765             int i = 0;
 766             while (sim_nodeid > pmeRanks[i])
 767             {
 768                 i++;
 769             }
 770             if (sim_nodeid < pmeRanks[i])
 771             {
 772                 pmenode = pmeRanks[i];
 773             }
 774         }
 775     }
 776
 777     return pmenode;
 778 }
 779
 780 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
 781 {
 782     if (dd != nullptr)
 783     {
 784         return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
 785     }
 786     else
 787     {
 788         return { 1, 1 };
 789     }
 790 }
 791
 792 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
 793 {
 794     const DDRankSetup&        ddRankSetup = cr->dd->comm->ddRankSetup;
 795     const CartesianRankSetup& cartSetup   = cr->dd->comm->cartesianRankSetup;
 796     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
 797                        "This function should only be called when PME-only ranks are in use");
 798     std::vector<int> ddranks;
 799     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
 800
 801     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 802     {
 803         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 804         {
 805             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 806             {
 807                 if (cartSetup.bCartesianPP_PME)
 808                 {
 809                     ivec coord = { x, y, z };
 810                     ivec coord_pme;
 811                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 812                     if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
 813                         && cr->dd->ci[ZZ] == coord_pme[ZZ])
 814                     {
 815                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 816                     }
 817                 }
 818                 else
 819                 {
 820                     /* The slab corresponds to the nodeid in the PME group */
 821                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 822                     {
 823                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 824                     }
 825                 }
 826             }
 827         }
 828     }
 829     return ddranks;
 830 }
 831
 832 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
 833 {
 834     bool bReceive = true;
 835
 836     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 837     if (ddRankSetup.usePmeOnlyRanks)
 838     {
 839         const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
 840         if (cartSetup.bCartesianPP_PME)
 841         {
 842 #if GMX_MPI
 843             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 844             ivec coords;
 845             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 846             coords[cartSetup.cartpmedim]++;
 847             if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
 848             {
 849                 int rank = 0;
 850                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 851                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 852                 {
 853                     /* This is not the last PP node for pmenode */
 854                     bReceive = FALSE;
 855                 }
 856             }
 857 #else
 858             GMX_RELEASE_ASSERT(
 859                     false,
 860                     "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 861 #endif
 862         }
 863         else
 864         {
 865             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 866             if (cr->sim_nodeid + 1 < cr->nnodes
 867                 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 868             {
 869                 /* This is not the last PP node for pmenode */
 870                 bReceive = FALSE;
 871             }
 872         }
 873     }
 874
 875     return bReceive;
 876 }
 877
 878 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
 879 {
 880     gmx_domdec_comm_t* comm = dd->comm;
 881
 882     snew(*dim_f, dd->numCells[dim] + 1);
 883     (*dim_f)[0] = 0;
 884     for (int i = 1; i < dd->numCells[dim]; i++)
 885     {
 886         if (comm->slb_frac[dim])
 887         {
 888             (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
 889         }
 890         else
 891         {
 892             (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
 893         }
 894     }
 895     (*dim_f)[dd->numCells[dim]] = 1;
 896 }
 897
 898 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
 899 {
 900     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 901
 902     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 903     {
 904         ddpme->dim = YY;
 905     }
 906     else
 907     {
 908         ddpme->dim = dimind;
 909     }
 910     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 911
 912     ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
 913
 914     if (ddpme->nslab <= 1)
 915     {
 916         return;
 917     }
 918
 919     const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
 920     /* Determine for each PME slab the PP location range for dimension dim */
 921     snew(ddpme->pp_min, ddpme->nslab);
 922     snew(ddpme->pp_max, ddpme->nslab);
 923     for (int slab = 0; slab < ddpme->nslab; slab++)
 924     {
 925         ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
 926         ddpme->pp_max[slab] = 0;
 927     }
 928     for (int i = 0; i < dd->nnodes; i++)
 929     {
 930         ivec xyz;
 931         ddindex2xyz(dd->numCells, i, xyz);
 932         /* For y only use our y/z slab.
 933          * This assumes that the PME x grid size matches the DD grid size.
 934          */
 935         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 936         {
 937             const int pmeindex  = ddindex2pmeindex(ddRankSetup, i);
 938             const int slab      = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
 939             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 940             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 941         }
 942     }
 943
 944     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
 945 }
 946
 947 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
 948 {
 949     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 950
 951     if (ddRankSetup.ddpme[0].dim == XX)
 952     {
 953         return ddRankSetup.ddpme[0].maxshift;
 954     }
 955     else
 956     {
 957         return 0;
 958     }
 959 }
 960
 961 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
 962 {
 963     const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
 964
 965     if (ddRankSetup.ddpme[0].dim == YY)
 966     {
 967         return ddRankSetup.ddpme[0].maxshift;
 968     }
 969     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
 970     {
 971         return ddRankSetup.ddpme[1].maxshift;
 972     }
 973     else
 974     {
 975         return 0;
 976     }
 977 }
 978
 979 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
 980 {
 981     return dd.comm->systemInfo.haveSplitConstraints;
 982 }
 983
 984 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
 985 {
 986     return dd.comm->systemInfo.useUpdateGroups;
 987 }
 988
 989 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
 990 {
 991     /* Note that the cycles value can be incorrect, either 0 or some
 992      * extremely large value, when our thread migrated to another core
 993      * with an unsynchronized cycle counter. If this happens less often
 994      * that once per nstlist steps, this will not cause issues, since
 995      * we later subtract the maximum value from the sum over nstlist steps.
 996      * A zero count will slightly lower the total, but that's a small effect.
 997      * Note that the main purpose of the subtraction of the maximum value
 998      * is to avoid throwing off the load balancing when stalls occur due
 999      * e.g. system activity or network congestion.
1000      */
1001     dd->comm->cycl[ddCycl] += cycles;
1002     dd->comm->cycl_n[ddCycl]++;
1003     if (cycles > dd->comm->cycl_max[ddCycl])
1004     {
1005         dd->comm->cycl_max[ddCycl] = cycles;
1006     }
1007 }
1008
1009 #if GMX_MPI
1010 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1011 {
1012     MPI_Comm c_row = MPI_COMM_NULL;
1013     ivec     loc_c;
1014     bool     bPartOfGroup = false;
1015
1016     const int dim = dd->dim[dim_ind];
1017     copy_ivec(loc, loc_c);
1018     for (int i = 0; i < dd->numCells[dim]; i++)
1019     {
1020         loc_c[dim]     = i;
1021         const int rank = dd_index(dd->numCells, loc_c);
1022         if (rank == dd->rank)
1023         {
1024             /* This process is part of the group */
1025             bPartOfGroup = TRUE;
1026         }
1027     }
1028     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1029     if (bPartOfGroup)
1030     {
1031         dd->comm->mpi_comm_load[dim_ind] = c_row;
1032         if (!isDlbDisabled(dd->comm))
1033         {
1034             DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1035
1036             if (dd->ci[dim] == dd->master_ci[dim])
1037             {
1038                 /* This is the root process of this row */
1039                 cellsizes.rowMaster = std::make_unique<RowMaster>();
1040
1041                 RowMaster& rowMaster = *cellsizes.rowMaster;
1042                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1043                 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1044                 rowMaster.isCellMin.resize(dd->numCells[dim]);
1045                 if (dim_ind > 0)
1046                 {
1047                     rowMaster.bounds.resize(dd->numCells[dim]);
1048                 }
1049                 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1050             }
1051             else
1052             {
1053                 /* This is not a root process, we only need to receive cell_f */
1054                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1055             }
1056         }
1057         if (dd->ci[dim] == dd->master_ci[dim])
1058         {
1059             snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1060         }
1061     }
1062 }
1063 #endif
1064
1065 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1066 {
1067 #if GMX_MPI
1068     MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
1069
1070     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1071     {
1072         /* Only ranks with short-ranged tasks (currently) use GPUs.
1073          * If we don't have GPUs assigned, there are no resources to share.
1074          */
1075         return;
1076     }
1077
1078     const int physicalnode_id_hash = gmx_physicalnode_id_hash();
1079
1080     gmx_domdec_t* dd = cr->dd;
1081
1082     if (debug)
1083     {
1084         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1085         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
1086     }
1087     /* Split the PP communicator over the physical nodes */
1088     /* TODO: See if we should store this (before), as it's also used for
1089      * for the nodecomm summation.
1090      */
1091     // TODO PhysicalNodeCommunicator could be extended/used to handle
1092     // the need for per-node per-group communicators.
1093     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1094     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1095     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1096     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1097
1098     if (debug)
1099     {
1100         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1101     }
1102
1103     /* Note that some ranks could share a GPU, while others don't */
1104
1105     if (dd->comm->nrank_gpu_shared == 1)
1106     {
1107         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1108     }
1109 #else
1110     GMX_UNUSED_VALUE(cr);
1111     GMX_UNUSED_VALUE(gpu_id);
1112 #endif
1113 }
1114
1115 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1116 {
1117 #if GMX_MPI
1118     ivec loc;
1119
1120     if (debug)
1121     {
1122         fprintf(debug, "Making load communicators\n");
1123     }
1124
1125     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1126     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1127
1128     if (dd->ndim == 0)
1129     {
1130         return;
1131     }
1132
1133     clear_ivec(loc);
1134     make_load_communicator(dd, 0, loc);
1135     if (dd->ndim > 1)
1136     {
1137         const int dim0 = dd->dim[0];
1138         for (int i = 0; i < dd->numCells[dim0]; i++)
1139         {
1140             loc[dim0] = i;
1141             make_load_communicator(dd, 1, loc);
1142         }
1143     }
1144     if (dd->ndim > 2)
1145     {
1146         const int dim0 = dd->dim[0];
1147         for (int i = 0; i < dd->numCells[dim0]; i++)
1148         {
1149             loc[dim0]      = i;
1150             const int dim1 = dd->dim[1];
1151             for (int j = 0; j < dd->numCells[dim1]; j++)
1152             {
1153                 loc[dim1] = j;
1154                 make_load_communicator(dd, 2, loc);
1155             }
1156         }
1157     }
1158
1159     if (debug)
1160     {
1161         fprintf(debug, "Finished making load communicators\n");
1162     }
1163 #endif
1164 }
1165
1166 /*! \brief Sets up the relation between neighboring domains and zones */
1167 static void setup_neighbor_relations(gmx_domdec_t* dd)
1168 {
1169     ivec tmp, s;
1170     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1171
1172     for (int d = 0; d < dd->ndim; d++)
1173     {
1174         const int dim = dd->dim[d];
1175         copy_ivec(dd->ci, tmp);
1176         tmp[dim]           = (tmp[dim] + 1) % dd->numCells[dim];
1177         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1178         copy_ivec(dd->ci, tmp);
1179         tmp[dim]           = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1180         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1181         if (debug)
1182         {
1183             fprintf(debug,
1184                     "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1185                     dd->rank,
1186                     dim,
1187                     dd->neighbor[d][0],
1188                     dd->neighbor[d][1]);
1189         }
1190     }
1191
1192     int nzone  = (1 << dd->ndim);
1193     int nizone = (1 << std::max(dd->ndim - 1, 0));
1194     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1195
1196     gmx_domdec_zones_t* zones = &dd->comm->zones;
1197
1198     for (int i = 0; i < nzone; i++)
1199     {
1200         int m = 0;
1201         clear_ivec(zones->shift[i]);
1202         for (int d = 0; d < dd->ndim; d++)
1203         {
1204             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1205         }
1206     }
1207
1208     zones->n = nzone;
1209     for (int i = 0; i < nzone; i++)
1210     {
1211         for (int d = 0; d < DIM; d++)
1212         {
1213             s[d] = dd->ci[d] - zones->shift[i][d];
1214             if (s[d] < 0)
1215             {
1216                 s[d] += dd->numCells[d];
1217             }
1218             else if (s[d] >= dd->numCells[d])
1219             {
1220                 s[d] -= dd->numCells[d];
1221             }
1222         }
1223     }
1224     for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1225     {
1226         GMX_RELEASE_ASSERT(
1227                 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1228                 "The first element for each ddNonbondedZonePairRanges should match its index");
1229
1230         DDPairInteractionRanges iZone;
1231         iZone.iZoneIndex = iZoneIndex;
1232         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1233          * j-zones up to nzone.
1234          */
1235         iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1236                                            std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1237         for (int dim = 0; dim < DIM; dim++)
1238         {
1239             if (dd->numCells[dim] == 1)
1240             {
1241                 /* All shifts should be allowed */
1242                 iZone.shift0[dim] = -1;
1243                 iZone.shift1[dim] = 1;
1244             }
1245             else
1246             {
1247                 /* Determine the min/max j-zone shift wrt the i-zone */
1248                 iZone.shift0[dim] = 1;
1249                 iZone.shift1[dim] = -1;
1250                 for (int jZone : iZone.jZoneRange)
1251                 {
1252                     int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1253                     if (shift_diff < iZone.shift0[dim])
1254                     {
1255                         iZone.shift0[dim] = shift_diff;
1256                     }
1257                     if (shift_diff > iZone.shift1[dim])
1258                     {
1259                         iZone.shift1[dim] = shift_diff;
1260                     }
1261                 }
1262             }
1263         }
1264
1265         zones->iZones.push_back(iZone);
1266     }
1267
1268     if (!isDlbDisabled(dd->comm))
1269     {
1270         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1271     }
1272
1273     if (dd->comm->ddSettings.recordLoad)
1274     {
1275         make_load_communicators(dd);
1276     }
1277 }
1278
1279 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1280                                  gmx_domdec_t*        dd,
1281                                  t_commrec gmx_unused* cr,
1282                                  bool gmx_unused reorder)
1283 {
1284 #if GMX_MPI
1285     gmx_domdec_comm_t*  comm      = dd->comm;
1286     CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1287
1288     if (cartSetup.bCartesianPP)
1289     {
1290         /* Set up cartesian communication for the particle-particle part */
1291         GMX_LOG(mdlog.info)
1292                 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1293                                      dd->numCells[XX],
1294                                      dd->numCells[YY],
1295                                      dd->numCells[ZZ]);
1296
1297         ivec periods;
1298         for (int i = 0; i < DIM; i++)
1299         {
1300             periods[i] = TRUE;
1301         }
1302         MPI_Comm comm_cart = MPI_COMM_NULL;
1303         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
1304         /* We overwrite the old communicator with the new cartesian one */
1305         cr->mpi_comm_mygroup = comm_cart;
1306     }
1307
1308     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1309     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1310
1311     if (cartSetup.bCartesianPP_PME)
1312     {
1313         /* Since we want to use the original cartesian setup for sim,
1314          * and not the one after split, we need to make an index.
1315          */
1316         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1317         cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1318         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1319         /* Get the rank of the DD master,
1320          * above we made sure that the master node is a PP node.
1321          */
1322         int rank = MASTER(cr) ? dd->rank : 0;
1323         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1324     }
1325     else if (cartSetup.bCartesianPP)
1326     {
1327         if (!comm->ddRankSetup.usePmeOnlyRanks)
1328         {
1329             /* The PP communicator is also
1330              * the communicator for this simulation
1331              */
1332             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1333         }
1334         cr->nodeid = dd->rank;
1335
1336         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1337
1338         /* We need to make an index to go from the coordinates
1339          * to the nodeid of this simulation.
1340          */
1341         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1342         std::vector<int> buf(dd->nnodes);
1343         if (thisRankHasDuty(cr, DUTY_PP))
1344         {
1345             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1346         }
1347         /* Communicate the ddindex to simulation nodeid index */
1348         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1349
1350         /* Determine the master coordinates and rank.
1351          * The DD master should be the same node as the master of this sim.
1352          */
1353         for (int i = 0; i < dd->nnodes; i++)
1354         {
1355             if (cartSetup.ddindex2simnodeid[i] == 0)
1356             {
1357                 ddindex2xyz(dd->numCells, i, dd->master_ci);
1358                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1359             }
1360         }
1361         if (debug)
1362         {
1363             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1364         }
1365     }
1366     else
1367     {
1368         /* No Cartesian communicators */
1369         /* We use the rank in dd->comm->all as DD index */
1370         ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1371         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1372         dd->masterrank = 0;
1373         clear_ivec(dd->master_ci);
1374     }
1375 #endif
1376
1377     GMX_LOG(mdlog.info)
1378             .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
1379                                  dd->rank,
1380                                  dd->ci[XX],
1381                                  dd->ci[YY],
1382                                  dd->ci[ZZ]);
1383     if (debug)
1384     {
1385         fprintf(debug,
1386                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1387                 dd->rank,
1388                 dd->ci[XX],
1389                 dd->ci[YY],
1390                 dd->ci[ZZ]);
1391     }
1392 }
1393
1394 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1395 {
1396 #if GMX_MPI
1397     CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1398
1399     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1400     {
1401         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1402         std::vector<int> buf(dd->nnodes);
1403         if (thisRankHasDuty(cr, DUTY_PP))
1404         {
1405             buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1406         }
1407         /* Communicate the ddindex to simulation nodeid index */
1408         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
1409     }
1410 #else
1411     GMX_UNUSED_VALUE(dd);
1412     GMX_UNUSED_VALUE(cr);
1413 #endif
1414 }
1415
1416 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1417                                              t_commrec*           cr,
1418                                              const DdRankOrder    ddRankOrder,
1419                                              bool gmx_unused    reorder,
1420                                              const DDRankSetup& ddRankSetup,
1421                                              ivec               ddCellIndex,
1422                                              std::vector<int>*  pmeRanks)
1423 {
1424     CartesianRankSetup cartSetup;
1425
1426     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1427     cartSetup.bCartesianPP_PME = false;
1428
1429     const ivec& numDDCells = ddRankSetup.numPPCells;
1430     /* Initially we set ntot to the number of PP cells */
1431     copy_ivec(numDDCells, cartSetup.ntot);
1432
1433     if (cartSetup.bCartesianPP)
1434     {
1435         const int numDDCellsTot = ddRankSetup.numPPRanks;
1436         bool      bDiv[DIM];
1437         for (int i = 1; i < DIM; i++)
1438         {
1439             bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1440         }
1441         if (bDiv[YY] || bDiv[ZZ])
1442         {
1443             cartSetup.bCartesianPP_PME = TRUE;
1444             /* If we have 2D PME decomposition, which is always in x+y,
1445              * we stack the PME only nodes in z.
1446              * Otherwise we choose the direction that provides the thinnest slab
1447              * of PME only nodes as this will have the least effect
1448              * on the PP communication.
1449              * But for the PME communication the opposite might be better.
1450              */
1451             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1452             {
1453                 cartSetup.cartpmedim = ZZ;
1454             }
1455             else
1456             {
1457                 cartSetup.cartpmedim = YY;
1458             }
1459             cartSetup.ntot[cartSetup.cartpmedim] +=
1460                     (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1461         }
1462         else
1463         {
1464             GMX_LOG(mdlog.info)
1465                     .appendTextFormatted(
1466                             "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1467                             "nx*nz (%d*%d)",
1468                             ddRankSetup.numRanksDoingPme,
1469                             numDDCells[XX],
1470                             numDDCells[YY],
1471                             numDDCells[XX],
1472                             numDDCells[ZZ]);
1473             GMX_LOG(mdlog.info)
1474                     .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1475         }
1476     }
1477
1478     if (cartSetup.bCartesianPP_PME)
1479     {
1480 #if GMX_MPI
1481         int  rank = 0;
1482         ivec periods;
1483
1484         GMX_LOG(mdlog.info)
1485                 .appendTextFormatted(
1486                         "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1487                         cartSetup.ntot[XX],
1488                         cartSetup.ntot[YY],
1489                         cartSetup.ntot[ZZ]);
1490
1491         for (int i = 0; i < DIM; i++)
1492         {
1493             periods[i] = TRUE;
1494         }
1495         MPI_Comm comm_cart = MPI_COMM_NULL;
1496         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
1497         MPI_Comm_rank(comm_cart, &rank);
1498         if (MASTER(cr) && rank != 0)
1499         {
1500             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1501         }
1502
1503         /* With this assigment we loose the link to the original communicator
1504          * which will usually be MPI_COMM_WORLD, unless have multisim.
1505          */
1506         cr->mpi_comm_mysim = comm_cart;
1507         cr->sim_nodeid     = rank;
1508
1509         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1510
1511         GMX_LOG(mdlog.info)
1512                 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
1513                                      cr->sim_nodeid,
1514                                      ddCellIndex[XX],
1515                                      ddCellIndex[YY],
1516                                      ddCellIndex[ZZ]);
1517
1518         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1519         {
1520             cr->duty = DUTY_PP;
1521         }
1522         if (!ddRankSetup.usePmeOnlyRanks
1523             || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1524         {
1525             cr->duty = DUTY_PME;
1526         }
1527
1528         /* Split the sim communicator into PP and PME only nodes */
1529         MPI_Comm_split(cr->mpi_comm_mysim,
1530                        getThisRankDuties(cr),
1531                        dd_index(cartSetup.ntot, ddCellIndex),
1532                        &cr->mpi_comm_mygroup);
1533 #else
1534         GMX_UNUSED_VALUE(ddCellIndex);
1535 #endif
1536     }
1537     else
1538     {
1539         switch (ddRankOrder)
1540         {
1541             case DdRankOrder::pp_pme:
1542                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1543                 break;
1544             case DdRankOrder::interleave:
1545                 /* Interleave the PP-only and PME-only ranks */
1546                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1547                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1548                 break;
1549             case DdRankOrder::cartesian: break;
1550             default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1551         }
1552
1553         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1554         {
1555             cr->duty = DUTY_PME;
1556         }
1557         else
1558         {
1559             cr->duty = DUTY_PP;
1560         }
1561 #if GMX_MPI
1562         /* Split the sim communicator into PP and PME only nodes */
1563         MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1564         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1565 #endif
1566     }
1567
1568     GMX_LOG(mdlog.info)
1569             .appendTextFormatted("This rank does only %s work.\n",
1570                                  thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1571
1572     return cartSetup;
1573 }
1574
1575 /*! \brief Makes the PP communicator and the PME communicator, when needed
1576  *
1577  * Returns the Cartesian rank setup.
1578  * Sets \p cr->mpi_comm_mygroup
1579  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1580  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1581  */
1582 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1583                                                  const DDSettings&    ddSettings,
1584                                                  const DdRankOrder    ddRankOrder,
1585                                                  const DDRankSetup&   ddRankSetup,
1586                                                  t_commrec*           cr,
1587                                                  ivec                 ddCellIndex,
1588                                                  std::vector<int>*    pmeRanks)
1589 {
1590     CartesianRankSetup cartSetup;
1591
1592     // As a default, both group and sim communicators are equal to the default communicator
1593     cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1594     cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
1595     cr->nnodes           = cr->sizeOfDefaultCommunicator;
1596     cr->nodeid           = cr->rankInDefaultCommunicator;
1597     cr->sim_nodeid       = cr->rankInDefaultCommunicator;
1598
1599     if (ddRankSetup.usePmeOnlyRanks)
1600     {
1601         /* Split the communicator into a PP and PME part */
1602         cartSetup = split_communicator(
1603                 mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
1604     }
1605     else
1606     {
1607         /* All nodes do PP and PME */
1608         /* We do not require separate communicators */
1609         cartSetup.bCartesianPP     = false;
1610         cartSetup.bCartesianPP_PME = false;
1611     }
1612
1613     return cartSetup;
1614 }
1615
1616 /*! \brief For PP ranks, sets or makes the communicator
1617  *
1618  * For PME ranks get the rank id.
1619  * For PP only ranks, sets the PME-only rank.
1620  */
1621 static void setupGroupCommunication(const gmx::MDLogger&     mdlog,
1622                                     const DDSettings&        ddSettings,
1623                                     gmx::ArrayRef<const int> pmeRanks,
1624                                     t_commrec*               cr,
1625                                     const int                numAtomsInSystem,
1626                                     gmx_domdec_t*            dd)
1627 {
1628     const DDRankSetup&        ddRankSetup = dd->comm->ddRankSetup;
1629     const CartesianRankSetup& cartSetup   = dd->comm->cartesianRankSetup;
1630
1631     if (thisRankHasDuty(cr, DUTY_PP))
1632     {
1633         /* Copy or make a new PP communicator */
1634
1635         /* We (possibly) reordered the nodes in split_communicator,
1636          * so it is no longer required in make_pp_communicator.
1637          */
1638         const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1639
1640         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1641     }
1642     else
1643     {
1644         receive_ddindex2simnodeid(dd, cr);
1645     }
1646
1647     if (!thisRankHasDuty(cr, DUTY_PME))
1648     {
1649         /* Set up the commnuication to our PME node */
1650         dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1651         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1652         if (debug)
1653         {
1654             fprintf(debug,
1655                     "My pme_nodeid %d receive ener %s\n",
1656                     dd->pme_nodeid,
1657                     gmx::boolToString(dd->pme_receive_vir_ener));
1658         }
1659     }
1660     else
1661     {
1662         dd->pme_nodeid = -1;
1663     }
1664
1665     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1666     if (MASTER(cr))
1667     {
1668         dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1669     }
1670 }
1671
1672 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1673 {
1674     real* slb_frac = nullptr;
1675     if (nc > 1 && size_string != nullptr)
1676     {
1677         GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1678         snew(slb_frac, nc);
1679         real tot = 0;
1680         for (int i = 0; i < nc; i++)
1681         {
1682             double dbl = 0;
1683             int    n   = 0;
1684             sscanf(size_string, "%20lf%n", &dbl, &n);
1685             if (dbl == 0)
1686             {
1687                 gmx_fatal(FARGS,
1688                           "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1689                           dir,
1690                           size_string);
1691             }
1692             slb_frac[i] = dbl;
1693             size_string += n;
1694             tot += slb_frac[i];
1695         }
1696         /* Normalize */
1697         std::string relativeCellSizes = "Relative cell sizes:";
1698         for (int i = 0; i < nc; i++)
1699         {
1700             slb_frac[i] /= tot;
1701             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1702         }
1703         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1704     }
1705
1706     return slb_frac;
1707 }
1708
1709 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1710 {
1711     int                  n     = 0;
1712     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1713     int                  nmol  = 0;
1714     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1715     {
1716         for (auto& ilist : extractILists(*ilists, IF_BOND))
1717         {
1718             if (NRAL(ilist.functionType) > 2)
1719             {
1720                 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1721             }
1722         }
1723     }
1724
1725     return n;
1726 }
1727
1728 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1729 {
1730     int   nst = def;
1731     char* val = getenv(env_var);
1732     if (val)
1733     {
1734         if (sscanf(val, "%20d", &nst) <= 0)
1735         {
1736             nst = 1;
1737         }
1738         GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1739     }
1740
1741     return nst;
1742 }
1743
1744 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1745 {
1746     if (ir->pbcType == PbcType::Screw
1747         && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1748     {
1749         gmx_fatal(FARGS,
1750                   "With pbc=%s can only do domain decomposition in the x-direction",
1751                   c_pbcTypeNames[ir->pbcType].c_str());
1752     }
1753
1754     if (ir->nstlist == 0)
1755     {
1756         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1757     }
1758
1759     if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1760     {
1761         GMX_LOG(mdlog.warning)
1762                 .appendText(
1763                         "comm-mode angular will give incorrect results when the comm group "
1764                         "partially crosses a periodic boundary");
1765     }
1766 }
1767
1768 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1769 {
1770     real r = ddbox.box_size[XX];
1771     for (int d = 0; d < DIM; d++)
1772     {
1773         if (numDomains[d] > 1)
1774         {
1775             /* Check using the initial average cell size */
1776             r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1777         }
1778     }
1779
1780     return r;
1781 }
1782
1783 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1784  */
1785 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1786                                   const std::string&   reasonStr,
1787                                   const gmx::MDLogger& mdlog)
1788 {
1789     std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1790     std::string dlbDisableNote     = "NOTE: disabling dynamic load balancing as ";
1791
1792     if (cmdlineDlbState == DlbState::onUser)
1793     {
1794         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1795     }
1796     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1797     {
1798         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1799     }
1800     return DlbState::offForever;
1801 }
1802
1803 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1804  *
1805  * This function parses the parameters of "-dlb" command line option setting
1806  * corresponding state values. Then it checks the consistency of the determined
1807  * state with other run parameters and settings. As a result, the initial state
1808  * may be altered or an error may be thrown if incompatibility of options is detected.
1809  *
1810  * \param [in] mdlog       Logger.
1811  * \param [in] dlbOption   Enum value for the DLB option.
1812  * \param [in] bRecordLoad True if the load balancer is recording load information.
1813  * \param [in] mdrunOptions  Options for mdrun.
1814  * \param [in] ir          Pointer mdrun to input parameters.
1815  * \returns                DLB initial/startup state.
1816  */
1817 static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
1818                                          DlbOption                dlbOption,
1819                                          gmx_bool                 bRecordLoad,
1820                                          const gmx::MdrunOptions& mdrunOptions,
1821                                          const t_inputrec*        ir)
1822 {
1823     DlbState dlbState = DlbState::offCanTurnOn;
1824
1825     switch (dlbOption)
1826     {
1827         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1828         case DlbOption::no: dlbState = DlbState::offUser; break;
1829         case DlbOption::yes: dlbState = DlbState::onUser; break;
1830         default: gmx_incons("Invalid dlbOption enum value");
1831     }
1832
1833     /* Reruns don't support DLB: bail or override auto mode */
1834     if (mdrunOptions.rerun)
1835     {
1836         std::string reasonStr = "it is not supported in reruns.";
1837         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1838     }
1839
1840     /* Unsupported integrators */
1841     if (!EI_DYNAMICS(ir->eI))
1842     {
1843         auto reasonStr = gmx::formatString(
1844                 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1845         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1846     }
1847
1848     /* Without cycle counters we can't time work to balance on */
1849     if (!bRecordLoad)
1850     {
1851         std::string reasonStr =
1852                 "cycle counters unsupported or not enabled in the operating system kernel.";
1853         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1854     }
1855
1856     if (mdrunOptions.reproducible)
1857     {
1858         std::string reasonStr = "you started a reproducible run.";
1859         switch (dlbState)
1860         {
1861             case DlbState::offUser: break;
1862             case DlbState::offForever:
1863                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1864                 break;
1865             case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1866             case DlbState::onCanTurnOff:
1867                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1868                 break;
1869             case DlbState::onUser:
1870                 return forceDlbOffOrBail(
1871                         dlbState,
1872                         reasonStr
1873                                 + " In load balanced runs binary reproducibility cannot be "
1874                                   "ensured.",
1875                         mdlog);
1876             default:
1877                 gmx_fatal(FARGS,
1878                           "Death horror: undefined case (%d) for load balancing choice",
1879                           static_cast<int>(dlbState));
1880         }
1881     }
1882
1883     return dlbState;
1884 }
1885
1886 static gmx_domdec_comm_t* init_dd_comm()
1887 {
1888     gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1889
1890     comm->n_load_have    = 0;
1891     comm->n_load_collect = 0;
1892
1893     comm->haveTurnedOffDlb = false;
1894
1895     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1896     {
1897         comm->sum_nat[i] = 0;
1898     }
1899     comm->ndecomp   = 0;
1900     comm->nload     = 0;
1901     comm->load_step = 0;
1902     comm->load_sum  = 0;
1903     comm->load_max  = 0;
1904     clear_ivec(comm->load_lim);
1905     comm->load_mdf = 0;
1906     comm->load_pme = 0;
1907
1908     /* This should be replaced by a unique pointer */
1909     comm->balanceRegion = ddBalanceRegionAllocate();
1910
1911     return comm;
1912 }
1913
1914 /* Returns whether mtop contains constraints and/or vsites */
1915 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1916 {
1917     auto* ilistLoop = gmx_mtop_ilistloop_init(mtop);
1918     int   nmol      = 0;
1919     while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1920     {
1921         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1922         {
1923             return true;
1924         }
1925     }
1926
1927     return false;
1928 }
1929
1930 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1931                               const gmx_mtop_t&    mtop,
1932                               const t_inputrec&    inputrec,
1933                               const real           cutoffMargin,
1934                               DDSystemInfo*        systemInfo)
1935 {
1936     /* When we have constraints and/or vsites, it is beneficial to use
1937      * update groups (when possible) to allow independent update of groups.
1938      */
1939     if (!systemHasConstraintsOrVsites(mtop))
1940     {
1941         /* No constraints or vsites, atoms can be updated independently */
1942         return;
1943     }
1944
1945     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1946     systemInfo->useUpdateGroups               = (!systemInfo->updateGroupingPerMoleculetype.empty()
1947                                    && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1948
1949     if (systemInfo->useUpdateGroups)
1950     {
1951         int numUpdateGroups = 0;
1952         for (const auto& molblock : mtop.molblock)
1953         {
1954             numUpdateGroups += molblock.nmol
1955                                * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1956         }
1957
1958         systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1959                 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1960
1961         /* To use update groups, the large domain-to-domain cutoff distance
1962          * should be compatible with the box size.
1963          */
1964         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1965
1966         if (systemInfo->useUpdateGroups)
1967         {
1968             GMX_LOG(mdlog.info)
1969                     .appendTextFormatted(
1970                             "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1971                             "nm\n",
1972                             numUpdateGroups,
1973                             mtop.natoms / static_cast<double>(numUpdateGroups),
1974                             systemInfo->maxUpdateGroupRadius);
1975         }
1976         else
1977         {
1978             GMX_LOG(mdlog.info)
1979                     .appendTextFormatted(
1980                             "The combination of rlist and box size prohibits the use of update "
1981                             "groups\n");
1982             systemInfo->updateGroupingPerMoleculetype.clear();
1983         }
1984     }
1985 }
1986
1987 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
1988     npbcdim(numPbcDimensions(ir.pbcType)),
1989     numBoundedDimensions(inputrec2nboundeddim(&ir)),
1990     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
1991     haveScrewPBC(ir.pbcType == PbcType::Screw)
1992 {
1993 }
1994
1995 /* Returns whether molecules are always whole, i.e. not broken by PBC */
1996 static bool moleculesAreAlwaysWhole(const gmx_mtop_t&                           mtop,
1997                                     const bool                                  useUpdateGroups,
1998                                     gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
1999 {
2000     if (useUpdateGroups)
2001     {
2002         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2003                            "Need one grouping per moltype");
2004         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2005         {
2006             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2007             {
2008                 return false;
2009             }
2010         }
2011     }
2012     else
2013     {
2014         for (const auto& moltype : mtop.moltype)
2015         {
2016             if (moltype.atoms.nr > 1)
2017             {
2018                 return false;
2019             }
2020         }
2021     }
2022
2023     return true;
2024 }
2025
2026 /*! \brief Generate the simulation system information */
2027 static DDSystemInfo getSystemInfo(const gmx::MDLogger&           mdlog,
2028                                   DDRole                         ddRole,
2029                                   MPI_Comm                       communicator,
2030                                   const DomdecOptions&           options,
2031                                   const gmx_mtop_t&              mtop,
2032                                   const t_inputrec&              ir,
2033                                   const matrix                   box,
2034                                   gmx::ArrayRef<const gmx::RVec> xGlobal)
2035 {
2036     const real tenPercentMargin = 1.1;
2037
2038     DDSystemInfo systemInfo;
2039
2040     /* We need to decide on update groups early, as this affects communication distances */
2041     systemInfo.useUpdateGroups = false;
2042     if (ir.cutoff_scheme == ecutsVERLET)
2043     {
2044         real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2045         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2046     }
2047
2048     systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2049             mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2050     systemInfo.haveInterDomainBondeds =
2051             (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2052     systemInfo.haveInterDomainMultiBodyBondeds =
2053             (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2054
2055     if (systemInfo.useUpdateGroups)
2056     {
2057         systemInfo.haveSplitConstraints = false;
2058         systemInfo.haveSplitSettles     = false;
2059     }
2060     else
2061     {
2062         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2063                                            || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2064         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2065     }
2066
2067     if (ir.rlist == 0)
2068     {
2069         /* Set the cut-off to some very large value,
2070          * so we don't need if statements everywhere in the code.
2071          * We use sqrt, since the cut-off is squared in some places.
2072          */
2073         systemInfo.cutoff = GMX_CUTOFF_INF;
2074     }
2075     else
2076     {
2077         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2078     }
2079     systemInfo.minCutoffForMultiBody = 0;
2080
2081     /* Determine the minimum cell size limit, affected by many factors */
2082     systemInfo.cellsizeLimit             = 0;
2083     systemInfo.filterBondedCommunication = false;
2084
2085     /* We do not allow home atoms to move beyond the neighboring domain
2086      * between domain decomposition steps, which limits the cell size.
2087      * Get an estimate of cell size limit due to atom displacement.
2088      * In most cases this is a large overestimate, because it assumes
2089      * non-interaction atoms.
2090      * We set the chance to 1 in a trillion steps.
2091      */
2092     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2093     const real     limitForAtomDisplacement          = minCellSizeForAtomDisplacement(
2094             mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2095     GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2096
2097     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2098
2099     /* TODO: PME decomposition currently requires atoms not to be more than
2100      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2101      *       In nearly all cases, limitForAtomDisplacement will be smaller
2102      *       than 2/3*rlist, so the PME requirement is satisfied.
2103      *       But it would be better for both correctness and performance
2104      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2105      *       Note that we would need to improve the pairlist buffer case.
2106      */
2107
2108     if (systemInfo.haveInterDomainBondeds)
2109     {
2110         if (options.minimumCommunicationRange > 0)
2111         {
2112             systemInfo.minCutoffForMultiBody =
2113                     atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2114             if (options.useBondedCommunication)
2115             {
2116                 systemInfo.filterBondedCommunication =
2117                         (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2118             }
2119             else
2120             {
2121                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2122             }
2123         }
2124         else if (ir.bPeriodicMols)
2125         {
2126             /* Can not easily determine the required cut-off */
2127             GMX_LOG(mdlog.warning)
2128                     .appendText(
2129                             "NOTE: Periodic molecules are present in this system. Because of this, "
2130                             "the domain decomposition algorithm cannot easily determine the "
2131                             "minimum cell size that it requires for treating bonded interactions. "
2132                             "Instead, domain decomposition will assume that half the non-bonded "
2133                             "cut-off will be a suitable lower bound.");
2134             systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2135         }
2136         else
2137         {
2138             real r_2b = 0;
2139             real r_mb = 0;
2140
2141             if (ddRole == DDRole::Master)
2142             {
2143                 dd_bonded_cg_distance(
2144                         mdlog, &mtop, &ir, xGlobal, box, options.checkBondedInteractions, &r_2b, &r_mb);
2145             }
2146             gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2147             gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2148
2149             /* We use an initial margin of 10% for the minimum cell size,
2150              * except when we are just below the non-bonded cut-off.
2151              */
2152             if (options.useBondedCommunication)
2153             {
2154                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2155                 {
2156                     const real r_bonded              = std::max(r_2b, r_mb);
2157                     systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2158                     /* This is the (only) place where we turn on the filtering */
2159                     systemInfo.filterBondedCommunication = true;
2160                 }
2161                 else
2162                 {
2163                     const real r_bonded = r_mb;
2164                     systemInfo.minCutoffForMultiBody =
2165                             std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2166                 }
2167                 /* We determine cutoff_mbody later */
2168                 systemInfo.increaseMultiBodyCutoff = true;
2169             }
2170             else
2171             {
2172                 /* No special bonded communication,
2173                  * simply increase the DD cut-off.
2174                  */
2175                 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2176                 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2177             }
2178         }
2179         GMX_LOG(mdlog.info)
2180                 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2181                                      systemInfo.minCutoffForMultiBody);
2182
2183         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2184     }
2185
2186     systemInfo.constraintCommunicationRange = 0;
2187     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2188     {
2189         /* There is a cell size limit due to the constraints (P-LINCS) */
2190         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2191         GMX_LOG(mdlog.info)
2192                 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2193                                      systemInfo.constraintCommunicationRange);
2194         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2195         {
2196             GMX_LOG(mdlog.info)
2197                     .appendText(
2198                             "This distance will limit the DD cell size, you can override this with "
2199                             "-rcon");
2200         }
2201     }
2202     else if (options.constraintCommunicationRange > 0)
2203     {
2204         /* Here we do not check for dd->splitConstraints.
2205          * because one can also set a cell size limit for virtual sites only
2206          * and at this point we don't know yet if there are intercg v-sites.
2207          */
2208         GMX_LOG(mdlog.info)
2209                 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2210                                      options.constraintCommunicationRange);
2211         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2212     }
2213     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2214
2215     return systemInfo;
2216 }
2217
2218 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2219  * implemented. */
2220 static void checkDDGridSetup(const DDGridSetup&   ddGridSetup,
2221                              DDRole               ddRole,
2222                              MPI_Comm             communicator,
2223                              int                  numNodes,
2224                              const DomdecOptions& options,
2225                              const DDSettings&    ddSettings,
2226                              const DDSystemInfo&  systemInfo,
2227                              const real           cellsizeLimit,
2228                              const gmx_ddbox_t&   ddbox)
2229 {
2230     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2231     {
2232         const bool  bC = (systemInfo.haveSplitConstraints
2233                          && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2234         std::string message =
2235                 gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
2236                                   !bC ? "-rdd" : "-rcon",
2237                                   ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2238                                   bC ? " or your LINCS settings" : "");
2239
2240         gmx_fatal_collective(FARGS,
2241                              communicator,
2242                              ddRole == DDRole::Master,
2243                              "There is no domain decomposition for %d ranks that is compatible "
2244                              "with the given box and a minimum cell size of %g nm\n"
2245                              "%s\n"
2246                              "Look in the log file for details on the domain decomposition",
2247                              numNodes - ddGridSetup.numPmeOnlyRanks,
2248                              cellsizeLimit,
2249                              message.c_str());
2250     }
2251
2252     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2253     if (acs < cellsizeLimit)
2254     {
2255         if (options.numCells[XX] <= 0)
2256         {
2257             GMX_RELEASE_ASSERT(
2258                     false,
2259                     "dd_choose_grid() should return a grid that satisfies the cell size limits");
2260         }
2261         else
2262         {
2263             gmx_fatal_collective(
2264                     FARGS,
2265                     communicator,
2266                     ddRole == DDRole::Master,
2267                     "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2268                     "options -dd, -rdd or -rcon, see the log file for details",
2269                     acs,
2270                     cellsizeLimit);
2271         }
2272     }
2273
2274     const int numPPRanks =
2275             ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2276     if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2277     {
2278         gmx_fatal_collective(FARGS,
2279                              communicator,
2280                              ddRole == DDRole::Master,
2281                              "The size of the domain decomposition grid (%d) does not match the "
2282                              "number of PP ranks (%d). The total number of ranks is %d",
2283                              numPPRanks,
2284                              numNodes - ddGridSetup.numPmeOnlyRanks,
2285                              numNodes);
2286     }
2287     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2288     {
2289         gmx_fatal_collective(FARGS,
2290                              communicator,
2291                              ddRole == DDRole::Master,
2292                              "The number of separate PME ranks (%d) is larger than the number of "
2293                              "PP ranks (%d), this is not supported.",
2294                              ddGridSetup.numPmeOnlyRanks,
2295                              numPPRanks);
2296     }
2297 }
2298
2299 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2300 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2301                                   int                  numNodes,
2302                                   const DDGridSetup&   ddGridSetup,
2303                                   const t_inputrec&    ir)
2304 {
2305     GMX_LOG(mdlog.info)
2306             .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2307                                  ddGridSetup.numDomains[XX],
2308                                  ddGridSetup.numDomains[YY],
2309                                  ddGridSetup.numDomains[ZZ],
2310                                  ddGridSetup.numPmeOnlyRanks);
2311
2312     DDRankSetup ddRankSetup;
2313
2314     ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2315     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2316
2317     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2318     if (ddRankSetup.usePmeOnlyRanks)
2319     {
2320         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2321     }
2322     else
2323     {
2324         ddRankSetup.numRanksDoingPme =
2325                 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2326     }
2327
2328     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2329     {
2330         /* The following choices should match those
2331          * in comm_cost_est in domdec_setup.c.
2332          * Note that here the checks have to take into account
2333          * that the decomposition might occur in a different order than xyz
2334          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2335          * in which case they will not match those in comm_cost_est,
2336          * but since that is mainly for testing purposes that's fine.
2337          */
2338         if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2339             && ddGridSetup.ddDimensions[1] == YY
2340             && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2341             && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2342             && getenv("GMX_PMEONEDD") == nullptr)
2343         {
2344             ddRankSetup.npmedecompdim = 2;
2345             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2346             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2347         }
2348         else
2349         {
2350             /* In case nc is 1 in both x and y we could still choose to
2351              * decompose pme in y instead of x, but we use x for simplicity.
2352              */
2353             ddRankSetup.npmedecompdim = 1;
2354             if (ddGridSetup.ddDimensions[0] == YY)
2355             {
2356                 ddRankSetup.npmenodes_x = 1;
2357                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2358             }
2359             else
2360             {
2361                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2362                 ddRankSetup.npmenodes_y = 1;
2363             }
2364         }
2365         GMX_LOG(mdlog.info)
2366                 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2367                                      ddRankSetup.npmenodes_x,
2368                                      ddRankSetup.npmenodes_y,
2369                                      1);
2370     }
2371     else
2372     {
2373         ddRankSetup.npmedecompdim = 0;
2374         ddRankSetup.npmenodes_x   = 0;
2375         ddRankSetup.npmenodes_y   = 0;
2376     }
2377
2378     return ddRankSetup;
2379 }
2380
2381 /*! \brief Set the cell size and interaction limits */
2382 static void set_dd_limits(const gmx::MDLogger& mdlog,
2383                           DDRole               ddRole,
2384                           gmx_domdec_t*        dd,
2385                           const DomdecOptions& options,
2386                           const DDSettings&    ddSettings,
2387                           const DDSystemInfo&  systemInfo,
2388                           const DDGridSetup&   ddGridSetup,
2389                           const int            numPPRanks,
2390                           const gmx_mtop_t*    mtop,
2391                           const t_inputrec*    ir,
2392                           const gmx_ddbox_t&   ddbox)
2393 {
2394     gmx_domdec_comm_t* comm = dd->comm;
2395     comm->ddSettings        = ddSettings;
2396
2397     /* Initialize to GPU share count to 0, might change later */
2398     comm->nrank_gpu_shared = 0;
2399
2400     comm->dlbState = comm->ddSettings.initialDlbState;
2401     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2402     /* To consider turning DLB on after 2*nstlist steps we need to check
2403      * at partitioning count 3. Thus we need to increase the first count by 2.
2404      */
2405     comm->ddPartioningCountFirstDlbOff += 2;
2406
2407     comm->bPMELoadBalDLBLimits = FALSE;
2408
2409     /* Allocate the charge group/atom sorting struct */
2410     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2411
2412     comm->systemInfo = systemInfo;
2413
2414     if (systemInfo.useUpdateGroups)
2415     {
2416         /* Note: We would like to use dd->nnodes for the atom count estimate,
2417          *       but that is not yet available here. But this anyhow only
2418          *       affect performance up to the second dd_partition_system call.
2419          */
2420         const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2421         comm->updateGroupsCog           = std::make_unique<gmx::UpdateGroupsCog>(
2422                 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir), homeAtomCountEstimate);
2423     }
2424
2425     /* Set the DD setup given by ddGridSetup */
2426     copy_ivec(ddGridSetup.numDomains, dd->numCells);
2427     dd->ndim = ddGridSetup.numDDDimensions;
2428     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2429
2430     dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2431
2432     snew(comm->slb_frac, DIM);
2433     if (isDlbDisabled(comm))
2434     {
2435         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2436         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2437         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2438     }
2439
2440     /* Set the multi-body cut-off and cellsize limit for DLB */
2441     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2442     comm->cellsize_limit = systemInfo.cellsizeLimit;
2443     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2444     {
2445         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2446         {
2447             /* Set the bonded communication distance to halfway
2448              * the minimum and the maximum,
2449              * since the extra communication cost is nearly zero.
2450              */
2451             real acs           = average_cellsize_min(ddbox, dd->numCells);
2452             comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2453             if (!isDlbDisabled(comm))
2454             {
2455                 /* Check if this does not limit the scaling */
2456                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2457             }
2458             if (!systemInfo.filterBondedCommunication)
2459             {
2460                 /* Without bBondComm do not go beyond the n.b. cut-off */
2461                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2462                 if (comm->cellsize_limit >= systemInfo.cutoff)
2463                 {
2464                     /* We don't loose a lot of efficieny
2465                      * when increasing it to the n.b. cut-off.
2466                      * It can even be slightly faster, because we need
2467                      * less checks for the communication setup.
2468                      */
2469                     comm->cutoff_mbody = systemInfo.cutoff;
2470                 }
2471             }
2472             /* Check if we did not end up below our original limit */
2473             comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2474
2475             if (comm->cutoff_mbody > comm->cellsize_limit)
2476             {
2477                 comm->cellsize_limit = comm->cutoff_mbody;
2478             }
2479         }
2480         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2481     }
2482
2483     if (debug)
2484     {
2485         fprintf(debug,
2486                 "Bonded atom communication beyond the cut-off: %s\n"
2487                 "cellsize limit %f\n",
2488                 gmx::boolToString(systemInfo.filterBondedCommunication),
2489                 comm->cellsize_limit);
2490     }
2491
2492     if (ddRole == DDRole::Master)
2493     {
2494         check_dd_restrictions(dd, ir, mdlog);
2495     }
2496 }
2497
2498 void dd_init_bondeds(FILE*                           fplog,
2499                      gmx_domdec_t*                   dd,
2500                      const gmx_mtop_t&               mtop,
2501                      const gmx::VirtualSitesHandler* vsite,
2502                      const t_inputrec*               ir,
2503                      gmx_bool                        bBCheck,
2504                      gmx::ArrayRef<cginfo_mb_t>      cginfo_mb)
2505 {
2506     dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2507
2508     gmx_domdec_comm_t* comm = dd->comm;
2509
2510     if (comm->systemInfo.filterBondedCommunication)
2511     {
2512         /* Communicate atoms beyond the cut-off for bonded interactions */
2513         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2514     }
2515     else
2516     {
2517         /* Only communicate atoms based on cut-off */
2518         comm->bondedLinks = nullptr;
2519     }
2520 }
2521
2522 static void writeSettings(gmx::TextWriter*   log,
2523                           gmx_domdec_t*      dd,
2524                           const gmx_mtop_t*  mtop,
2525                           const t_inputrec*  ir,
2526                           gmx_bool           bDynLoadBal,
2527                           real               dlb_scale,
2528                           const gmx_ddbox_t* ddbox)
2529 {
2530     gmx_domdec_comm_t* comm = dd->comm;
2531
2532     if (bDynLoadBal)
2533     {
2534         log->writeString("The maximum number of communication pulses is:");
2535         for (int d = 0; d < dd->ndim; d++)
2536         {
2537             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2538         }
2539         log->ensureLineBreak();
2540         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2541                                 comm->cellsize_limit);
2542         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2543         log->writeString("The allowed shrink of domain decomposition cells is:");
2544         for (int d = 0; d < DIM; d++)
2545         {
2546             if (dd->numCells[d] > 1)
2547             {
2548                 const real shrink =
2549                         (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2550                                 ? 0
2551                                 : comm->cellsize_min_dlb[d]
2552                                           / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2553                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2554             }
2555         }
2556         log->ensureLineBreak();
2557     }
2558     else
2559     {
2560         ivec np;
2561         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2562         log->writeString("The initial number of communication pulses is:");
2563         for (int d = 0; d < dd->ndim; d++)
2564         {
2565             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2566         }
2567         log->ensureLineBreak();
2568         log->writeString("The initial domain decomposition cell size is:");
2569         for (int d = 0; d < DIM; d++)
2570         {
2571             if (dd->numCells[d] > 1)
2572             {
2573                 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2574             }
2575         }
2576         log->ensureLineBreak();
2577         log->writeLine();
2578     }
2579
2580     const bool haveInterDomainVsites =
2581             (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2582
2583     if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2584         || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2585     {
2586         std::string decompUnits;
2587         if (comm->systemInfo.useUpdateGroups)
2588         {
2589             decompUnits = "atom groups";
2590         }
2591         else
2592         {
2593             decompUnits = "atoms";
2594         }
2595
2596         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2597                                 decompUnits.c_str());
2598         log->writeLineFormatted(
2599                 "%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2600
2601         real limit = 0;
2602         if (bDynLoadBal)
2603         {
2604             limit = dd->comm->cellsize_limit;
2605         }
2606         else
2607         {
2608             if (dd->unitCellInfo.ddBoxIsDynamic)
2609             {
2610                 log->writeLine(
2611                         "(the following are initial values, they could change due to box "
2612                         "deformation)");
2613             }
2614             limit = dd->comm->cellsize_min[XX];
2615             for (int d = 1; d < DIM; d++)
2616             {
2617                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2618             }
2619         }
2620
2621         if (comm->systemInfo.haveInterDomainBondeds)
2622         {
2623             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2624                                     "two-body bonded interactions",
2625                                     "(-rdd)",
2626                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2627             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2628                                     "multi-body bonded interactions",
2629                                     "(-rdd)",
2630                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2631                                             ? comm->cutoff_mbody
2632                                             : std::min(comm->systemInfo.cutoff, limit));
2633         }
2634         if (haveInterDomainVsites)
2635         {
2636             log->writeLineFormatted("%40s  %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2637         }
2638         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2639         {
2640             std::string separation =
2641                     gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2642             log->writeLineFormatted("%40s  %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2643         }
2644         log->ensureLineBreak();
2645     }
2646 }
2647
2648 static void logSettings(const gmx::MDLogger& mdlog,
2649                         gmx_domdec_t*        dd,
2650                         const gmx_mtop_t*    mtop,
2651                         const t_inputrec*    ir,
2652                         real                 dlb_scale,
2653                         const gmx_ddbox_t*   ddbox)
2654 {
2655     gmx::StringOutputStream stream;
2656     gmx::TextWriter         log(&stream);
2657     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2658     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2659     {
2660         {
2661             log.ensureEmptyLine();
2662             log.writeLine(
2663                     "When dynamic load balancing gets turned on, these settings will change to:");
2664         }
2665         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2666     }
2667     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2668 }
2669
2670 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2671                                 gmx_domdec_t*        dd,
2672                                 real                 dlb_scale,
2673                                 const t_inputrec*    ir,
2674                                 const gmx_ddbox_t*   ddbox)
2675 {
2676     int npulse       = 0;
2677     int npulse_d_max = 0;
2678     int npulse_d     = 0;
2679
2680     gmx_domdec_comm_t* comm = dd->comm;
2681
2682     bool bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2683
2684     /* Determine the maximum number of comm. pulses in one dimension */
2685
2686     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2687
2688     /* Determine the maximum required number of grid pulses */
2689     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2690     {
2691         /* Only a single pulse is required */
2692         npulse = 1;
2693     }
2694     else if (!bNoCutOff && comm->cellsize_limit > 0)
2695     {
2696         /* We round down slightly here to avoid overhead due to the latency
2697          * of extra communication calls when the cut-off
2698          * would be only slightly longer than the cell size.
2699          * Later cellsize_limit is redetermined,
2700          * so we can not miss interactions due to this rounding.
2701          */
2702         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2703     }
2704     else
2705     {
2706         /* There is no cell size limit */
2707         npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2708     }
2709
2710     if (!bNoCutOff && npulse > 1)
2711     {
2712         /* See if we can do with less pulses, based on dlb_scale */
2713         npulse_d_max = 0;
2714         for (int d = 0; d < dd->ndim; d++)
2715         {
2716             int dim  = dd->dim[d];
2717             npulse_d = static_cast<int>(
2718                     1
2719                     + dd->numCells[dim] * comm->systemInfo.cutoff
2720                               / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2721             npulse_d_max = std::max(npulse_d_max, npulse_d);
2722         }
2723         npulse = std::min(npulse, npulse_d_max);
2724     }
2725
2726     /* This env var can override npulse */
2727     const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2728     if (ddPulseEnv > 0)
2729     {
2730         npulse = ddPulseEnv;
2731     }
2732
2733     comm->maxpulse       = 1;
2734     comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2735     for (int d = 0; d < dd->ndim; d++)
2736     {
2737         comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2738         comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2739         if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2740         {
2741             comm->bVacDLBNoLimit = FALSE;
2742         }
2743     }
2744
2745     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2746     if (!comm->bVacDLBNoLimit)
2747     {
2748         comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2749     }
2750     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2751     /* Set the minimum cell size for each DD dimension */
2752     for (int d = 0; d < dd->ndim; d++)
2753     {
2754         if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2755         {
2756             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2757         }
2758         else
2759         {
2760             comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2761         }
2762     }
2763     if (comm->cutoff_mbody <= 0)
2764     {
2765         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2766     }
2767     if (isDlbOn(comm))
2768     {
2769         set_dlb_limits(dd);
2770     }
2771 }
2772
2773 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2774 {
2775     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2776 }
2777
2778 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2779 {
2780     /* If each molecule is a single charge group
2781      * or we use domain decomposition for each periodic dimension,
2782      * we do not need to take pbc into account for the bonded interactions.
2783      */
2784     return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2785             && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2786                  && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2787 }
2788
2789 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2790 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2791                                   gmx_domdec_t*        dd,
2792                                   real                 dlb_scale,
2793                                   const gmx_mtop_t*    mtop,
2794                                   const t_inputrec*    ir,
2795                                   const gmx_ddbox_t*   ddbox)
2796 {
2797     gmx_domdec_comm_t* comm        = dd->comm;
2798     DDRankSetup&       ddRankSetup = comm->ddRankSetup;
2799
2800     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2801     {
2802         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2803         if (ddRankSetup.npmedecompdim >= 2)
2804         {
2805             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2806         }
2807     }
2808     else
2809     {
2810         ddRankSetup.numRanksDoingPme = 0;
2811         if (dd->pme_nodeid >= 0)
2812         {
2813             gmx_fatal_collective(FARGS,
2814                                  dd->mpi_comm_all,
2815                                  DDMASTER(dd),
2816                                  "Can not have separate PME ranks without PME electrostatics");
2817         }
2818     }
2819
2820     if (debug)
2821     {
2822         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2823     }
2824     if (!isDlbDisabled(comm))
2825     {
2826         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2827     }
2828
2829     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2830
2831     const real vol_frac = (ir->pbcType == PbcType::No)
2832                                   ? (1 - 1 / static_cast<double>(dd->nnodes))
2833                                   : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2834                                      / static_cast<double>(dd->nnodes));
2835     if (debug)
2836     {
2837         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2838     }
2839     int natoms_tot = mtop->natoms;
2840
2841     dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2842 }
2843
2844 /*! \brief Get some important DD parameters which can be modified by env.vars */
2845 static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
2846                                 const DomdecOptions&     options,
2847                                 const gmx::MdrunOptions& mdrunOptions,
2848                                 const t_inputrec&        ir)
2849 {
2850     DDSettings ddSettings;
2851
2852     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2853     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2854     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2855     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2856     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2857     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2858     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2859     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2860     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2861
2862     if (ddSettings.useSendRecv2)
2863     {
2864         GMX_LOG(mdlog.info)
2865                 .appendText(
2866                         "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2867                         "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2868                         "communication");
2869     }
2870
2871     if (ddSettings.eFlop)
2872     {
2873         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2874         ddSettings.recordLoad = true;
2875     }
2876     else
2877     {
2878         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2879     }
2880
2881     ddSettings.initialDlbState = determineInitialDlbState(
2882             mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2883     GMX_LOG(mdlog.info)
2884             .appendTextFormatted("Dynamic load balancing: %s",
2885                                  edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2886
2887     return ddSettings;
2888 }
2889
2890 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2891
2892 namespace gmx
2893 {
2894
2895 // TODO once the functionality stablizes, move this class and
2896 // supporting functionality into builder.cpp
2897 /*! \brief Impl class for DD builder */
2898 class DomainDecompositionBuilder::Impl
2899 {
2900 public:
2901     //! Constructor
2902     Impl(const MDLogger&      mdlog,
2903          t_commrec*           cr,
2904          const DomdecOptions& options,
2905          const MdrunOptions&  mdrunOptions,
2906          const gmx_mtop_t&    mtop,
2907          const t_inputrec&    ir,
2908          const matrix         box,
2909          ArrayRef<const RVec> xGlobal);
2910
2911     //! Build the resulting DD manager
2912     gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2913
2914     //! Objects used in constructing and configuring DD
2915     //! {
2916     //! Logging object
2917     const MDLogger& mdlog_;
2918     //! Communication object
2919     t_commrec* cr_;
2920     //! User-supplied options configuring DD behavior
2921     const DomdecOptions options_;
2922     //! Global system topology
2923     const gmx_mtop_t& mtop_;
2924     //! User input values from the tpr file
2925     const t_inputrec& ir_;
2926     //! }
2927
2928     //! Internal objects used in constructing DD
2929     //! {
2930     //! Settings combined from the user input
2931     DDSettings ddSettings_;
2932     //! Information derived from the simulation system
2933     DDSystemInfo systemInfo_;
2934     //! Box structure
2935     gmx_ddbox_t ddbox_ = { 0 };
2936     //! Organization of the DD grids
2937     DDGridSetup ddGridSetup_;
2938     //! Organzation of the DD ranks
2939     DDRankSetup ddRankSetup_;
2940     //! Number of DD cells in each dimension
2941     ivec ddCellIndex_ = { 0, 0, 0 };
2942     //! IDs of PME-only ranks
2943     std::vector<int> pmeRanks_;
2944     //! Contains a valid Cartesian-communicator-based setup, or defaults.
2945     CartesianRankSetup cartSetup_;
2946     //! }
2947 };
2948
2949 DomainDecompositionBuilder::Impl::Impl(const MDLogger&      mdlog,
2950                                        t_commrec*           cr,
2951                                        const DomdecOptions& options,
2952                                        const MdrunOptions&  mdrunOptions,
2953                                        const gmx_mtop_t&    mtop,
2954                                        const t_inputrec&    ir,
2955                                        const matrix         box,
2956                                        ArrayRef<const RVec> xGlobal) :
2957     mdlog_(mdlog),
2958     cr_(cr),
2959     options_(options),
2960     mtop_(mtop),
2961     ir_(ir)
2962 {
2963     GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2964
2965     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2966
2967     if (ddSettings_.eFlop > 1)
2968     {
2969         /* Ensure that we have different random flop counts on different ranks */
2970         srand(1 + cr_->rankInDefaultCommunicator);
2971     }
2972
2973     systemInfo_ = getSystemInfo(mdlog_,
2974                                 MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2975                                 cr->mpiDefaultCommunicator,
2976                                 options_,
2977                                 mtop_,
2978                                 ir_,
2979                                 box,
2980                                 xGlobal);
2981
2982     const int  numRanksRequested         = cr_->sizeOfDefaultCommunicator;
2983     const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2984     checkForValidRankCountRequests(
2985             numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks, checkForLargePrimeFactors);
2986
2987     // DD grid setup uses a more different cell size limit for
2988     // automated setup than the one in systemInfo_. The latter is used
2989     // in set_dd_limits() to configure DLB, for example.
2990     const real gridSetupCellsizeLimit =
2991             getDDGridSetupCellSizeLimit(mdlog_,
2992                                         !isDlbDisabled(ddSettings_.initialDlbState),
2993                                         options_.dlbScaling,
2994                                         ir_,
2995                                         systemInfo_.cellsizeLimit);
2996     ddGridSetup_ = getDDGridSetup(mdlog_,
2997                                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2998                                   cr->mpiDefaultCommunicator,
2999                                   numRanksRequested,
3000                                   options_,
3001                                   ddSettings_,
3002                                   systemInfo_,
3003                                   gridSetupCellsizeLimit,
3004                                   mtop_,
3005                                   ir_,
3006                                   box,
3007                                   xGlobal,
3008                                   &ddbox_);
3009     checkDDGridSetup(ddGridSetup_,
3010                      MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3011                      cr->mpiDefaultCommunicator,
3012                      cr->sizeOfDefaultCommunicator,
3013                      options_,
3014                      ddSettings_,
3015                      systemInfo_,
3016                      gridSetupCellsizeLimit,
3017                      ddbox_);
3018
3019     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3020
3021     ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
3022
3023     /* Generate the group communicator, also decides the duty of each rank */
3024     cartSetup_ = makeGroupCommunicators(
3025             mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
3026 }
3027
3028 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3029 {
3030     gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3031
3032     copy_ivec(ddCellIndex_, dd->ci);
3033
3034     dd->comm = init_dd_comm();
3035
3036     dd->comm->ddRankSetup        = ddRankSetup_;
3037     dd->comm->cartesianRankSetup = cartSetup_;
3038
3039     set_dd_limits(mdlog_,
3040                   MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3041                   dd,
3042                   options_,
3043                   ddSettings_,
3044                   systemInfo_,
3045                   ddGridSetup_,
3046                   ddRankSetup_.numPPRanks,
3047                   &mtop_,
3048                   &ir_,
3049                   ddbox_);
3050
3051     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3052
3053     if (thisRankHasDuty(cr_, DUTY_PP))
3054     {
3055         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3056
3057         setup_neighbor_relations(dd);
3058     }
3059
3060     /* Set overallocation to avoid frequent reallocation of arrays */
3061     set_over_alloc_dd(true);
3062
3063     dd->atomSets = atomSets;
3064
3065     return dd;
3066 }
3067
3068 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&      mdlog,
3069                                                        t_commrec*           cr,
3070                                                        const DomdecOptions& options,
3071                                                        const MdrunOptions&  mdrunOptions,
3072                                                        const gmx_mtop_t&    mtop,
3073                                                        const t_inputrec&    ir,
3074                                                        const matrix         box,
3075                                                        ArrayRef<const RVec> xGlobal) :
3076     impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
3077 {
3078 }
3079
3080 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3081 {
3082     return impl_->build(atomSets);
3083 }
3084
3085 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3086
3087 } // namespace gmx
3088
3089 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3090 {
3091     gmx_ddbox_t ddbox;
3092     int         LocallyLimited = 0;
3093
3094     const auto* dd = cr->dd;
3095
3096     set_ddbox(*dd, false, box, true, x, &ddbox);
3097
3098     LocallyLimited = 0;
3099
3100     for (int d = 0; d < dd->ndim; d++)
3101     {
3102         const int dim = dd->dim[d];
3103
3104         real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3105         if (dd->unitCellInfo.ddBoxIsDynamic)
3106         {
3107             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3108         }
3109
3110         const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3111
3112         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3113         {
3114             if (np > dd->comm->cd[d].np_dlb)
3115             {
3116                 return FALSE;
3117             }
3118
3119             /* If a current local cell size is smaller than the requested
3120              * cut-off, we could still fix it, but this gets very complicated.
3121              * Without fixing here, we might actually need more checks.
3122              */
3123             real cellSizeAlongDim =
3124                     (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3125             if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3126             {
3127                 LocallyLimited = 1;
3128             }
3129         }
3130     }
3131
3132     if (!isDlbDisabled(dd->comm))
3133     {
3134         /* If DLB is not active yet, we don't need to check the grid jumps.
3135          * Actually we shouldn't, because then the grid jump data is not set.
3136          */
3137         if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3138         {
3139             LocallyLimited = 1;
3140         }
3141
3142         gmx_sumi(1, &LocallyLimited, cr);
3143
3144         if (LocallyLimited > 0)
3145         {
3146             return FALSE;
3147         }
3148     }
3149
3150     return TRUE;
3151 }
3152
3153 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3154 {
3155     bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3156
3157     if (bCutoffAllowed)
3158     {
3159         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3160     }
3161
3162     return bCutoffAllowed;
3163 }
3164
3165 void constructGpuHaloExchange(const gmx::MDLogger&            mdlog,
3166                               const t_commrec&                cr,
3167                               const gmx::DeviceStreamManager& deviceStreamManager,
3168                               gmx_wallcycle*                  wcycle)
3169 {
3170     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3171                        "Local non-bonded stream should be valid when using"
3172                        "GPU halo exchange.");
3173     GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3174                        "Non-local non-bonded stream should be valid when using "
3175                        "GPU halo exchange.");
3176
3177     if (cr.dd->gpuHaloExchange[0].empty())
3178     {
3179         GMX_LOG(mdlog.warning)
3180                 .asParagraph()
3181                 .appendTextFormatted(
3182                         "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3183                         "by the "
3184                         "GMX_GPU_DD_COMMS environment variable.");
3185     }
3186
3187     for (int d = 0; d < cr.dd->ndim; d++)
3188     {
3189         for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3190         {
3191             cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3192                     cr.dd,
3193                     d,
3194                     cr.mpi_comm_mysim,
3195                     deviceStreamManager.context(),
3196                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3197                     deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
3198                     pulse,
3199                     wcycle));
3200         }
3201     }
3202 }
3203
3204 void reinitGpuHaloExchange(const t_commrec&              cr,
3205                            const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3206                            const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3207 {
3208     for (int d = 0; d < cr.dd->ndim; d++)
3209     {
3210         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3211         {
3212             cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3213         }
3214     }
3215 }
3216
3217 void communicateGpuHaloCoordinates(const t_commrec&      cr,
3218                                    const matrix          box,
3219                                    GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3220 {
3221     for (int d = 0; d < cr.dd->ndim; d++)
3222     {
3223         for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3224         {
3225             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3226         }
3227     }
3228 }
3229
3230 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3231 {
3232     for (int d = cr.dd->ndim - 1; d >= 0; d--)
3233     {
3234         for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3235         {
3236             cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3237         }
3238     }
3239 }