src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/collect.h"
  54 #include "gromacs/domdec/dlb.h"
  55 #include "gromacs/domdec/dlbtiming.h"
  56 #include "gromacs/domdec/domdec_network.h"
  57 #include "gromacs/domdec/ga2la.h"
  58 #include "gromacs/domdec/gpuhaloexchange.h"
  59 #include "gromacs/domdec/options.h"
  60 #include "gromacs/domdec/partition.h"
  61 #include "gromacs/gmxlib/network.h"
  62 #include "gromacs/gmxlib/nrnb.h"
  63 #include "gromacs/gpu_utils/gpu_utils.h"
  64 #include "gromacs/hardware/hw_info.h"
  65 #include "gromacs/listed_forces/manage_threading.h"
  66 #include "gromacs/math/vec.h"
  67 #include "gromacs/math/vectypes.h"
  68 #include "gromacs/mdlib/calc_verletbuf.h"
  69 #include "gromacs/mdlib/constr.h"
  70 #include "gromacs/mdlib/constraintrange.h"
  71 #include "gromacs/mdlib/updategroups.h"
  72 #include "gromacs/mdlib/vsite.h"
  73 #include "gromacs/mdtypes/commrec.h"
  74 #include "gromacs/mdtypes/forceoutput.h"
  75 #include "gromacs/mdtypes/inputrec.h"
  76 #include "gromacs/mdtypes/mdrunoptions.h"
  77 #include "gromacs/mdtypes/state.h"
  78 #include "gromacs/pbcutil/ishift.h"
  79 #include "gromacs/pbcutil/pbc.h"
  80 #include "gromacs/pulling/pull.h"
  81 #include "gromacs/timing/wallcycle.h"
  82 #include "gromacs/topology/block.h"
  83 #include "gromacs/topology/idef.h"
  84 #include "gromacs/topology/ifunc.h"
  85 #include "gromacs/topology/mtop_lookup.h"
  86 #include "gromacs/topology/mtop_util.h"
  87 #include "gromacs/topology/topology.h"
  88 #include "gromacs/utility/basedefinitions.h"
  89 #include "gromacs/utility/basenetwork.h"
  90 #include "gromacs/utility/cstringutil.h"
  91 #include "gromacs/utility/exceptions.h"
  92 #include "gromacs/utility/fatalerror.h"
  93 #include "gromacs/utility/gmxmpi.h"
  94 #include "gromacs/utility/logger.h"
  95 #include "gromacs/utility/real.h"
  96 #include "gromacs/utility/smalloc.h"
  97 #include "gromacs/utility/strconvert.h"
  98 #include "gromacs/utility/stringstream.h"
  99 #include "gromacs/utility/stringutil.h"
 100 #include "gromacs/utility/textwriter.h"
 101
 102 #include "atomdistribution.h"
 103 #include "box.h"
 104 #include "cellsizes.h"
 105 #include "distribute.h"
 106 #include "domdec_constraints.h"
 107 #include "domdec_internal.h"
 108 #include "domdec_setup.h"
 109 #include "domdec_vsite.h"
 110 #include "redistribute.h"
 111 #include "utility.h"
 112
 113 // TODO remove this when moving domdec into gmx namespace
 114 using gmx::DdRankOrder;
 115 using gmx::DlbOption;
 116 using gmx::DomdecOptions;
 117
 118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 119
 120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 121 #define DD_CGIBS 2
 122
 123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 124 #define DD_FLAG_NRCG  65535
 125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 127
 128 /* The DD zone order */
 129 static const ivec dd_zo[DD_MAXZONE] =
 130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 131
 132 /* The non-bonded zone-pair setup for domain decomposition
 133  * The first number is the i-zone, the second number the first j-zone seen by
 134  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 135  * As is, this is for 3D decomposition, where there are 4 i-zones.
 136  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 137  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 138  */
 139 static const int
 140     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 141                                                  {1, 3, 6},
 142                                                  {2, 5, 6},
 143                                                  {3, 5, 7}};
 144
 145
 146 /*
 147    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 148
 149    static void index2xyz(ivec nc,int ind,ivec xyz)
 150    {
 151    xyz[XX] = ind % nc[XX];
 152    xyz[YY] = (ind / nc[XX]) % nc[YY];
 153    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 154    }
 155  */
 156
 157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 158 {
 159     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 160     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 161     xyz[ZZ] = ind % nc[ZZ];
 162 }
 163
 164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 165 {
 166     int                       ddnodeid = -1;
 167
 168     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 169     const int                 ddindex   = dd_index(dd->nc, c);
 170     if (cartSetup.bCartesianPP_PME)
 171     {
 172         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 173     }
 174     else if (cartSetup.bCartesianPP)
 175     {
 176 #if GMX_MPI
 177         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 178 #endif
 179     }
 180     else
 181     {
 182         ddnodeid = ddindex;
 183     }
 184
 185     return ddnodeid;
 186 }
 187
 188 int ddglatnr(const gmx_domdec_t *dd, int i)
 189 {
 190     int atnr;
 191
 192     if (dd == nullptr)
 193     {
 194         atnr = i + 1;
 195     }
 196     else
 197     {
 198         if (i >= dd->comm->atomRanges.numAtomsTotal())
 199         {
 200             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 201         }
 202         atnr = dd->globalAtomIndices[i] + 1;
 203     }
 204
 205     return atnr;
 206 }
 207
 208 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 209 {
 210     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 211     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 212 }
 213
 214 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 215 {
 216     int i;
 217
 218     if (state->ddp_count != dd->ddp_count)
 219     {
 220         gmx_incons("The MD state does not match the domain decomposition state");
 221     }
 222
 223     state->cg_gl.resize(dd->ncg_home);
 224     for (i = 0; i < dd->ncg_home; i++)
 225     {
 226         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 227     }
 228
 229     state->ddp_count_cg_gl = dd->ddp_count;
 230 }
 231
 232 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 233 {
 234     return &dd->comm->zones;
 235 }
 236
 237 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 238 {
 239     return dd.comm->atomRanges.numHomeAtoms();
 240 }
 241
 242 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 243 {
 244     /* We currently set mdatoms entries for all atoms:
 245      * local + non-local + communicated for vsite + constraints
 246      */
 247
 248     return dd->comm->atomRanges.numAtomsTotal();
 249 }
 250
 251 int dd_natoms_vsite(const gmx_domdec_t *dd)
 252 {
 253     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 254 }
 255
 256 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 257 {
 258     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 259     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 260 }
 261
 262 void dd_move_x(gmx_domdec_t             *dd,
 263                const matrix              box,
 264                gmx::ArrayRef<gmx::RVec>  x,
 265                gmx_wallcycle            *wcycle)
 266 {
 267     wallcycle_start(wcycle, ewcMOVEX);
 268
 269     int                    nzone, nat_tot;
 270     gmx_domdec_comm_t     *comm;
 271     gmx_domdec_comm_dim_t *cd;
 272     rvec                   shift = {0, 0, 0};
 273     gmx_bool               bPBC, bScrew;
 274
 275     comm = dd->comm;
 276
 277     nzone   = 1;
 278     nat_tot = comm->atomRanges.numHomeAtoms();
 279     for (int d = 0; d < dd->ndim; d++)
 280     {
 281         bPBC   = (dd->ci[dd->dim[d]] == 0);
 282         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 283         if (bPBC)
 284         {
 285             copy_rvec(box[dd->dim[d]], shift);
 286         }
 287         cd = &comm->cd[d];
 288         for (const gmx_domdec_ind_t &ind : cd->ind)
 289         {
 290             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 291             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 292             int                        n          = 0;
 293             if (!bPBC)
 294             {
 295                 for (int j : ind.index)
 296                 {
 297                     sendBuffer[n] = x[j];
 298                     n++;
 299                 }
 300             }
 301             else if (!bScrew)
 302             {
 303                 for (int j : ind.index)
 304                 {
 305                     /* We need to shift the coordinates */
 306                     for (int d = 0; d < DIM; d++)
 307                     {
 308                         sendBuffer[n][d] = x[j][d] + shift[d];
 309                     }
 310                     n++;
 311                 }
 312             }
 313             else
 314             {
 315                 for (int j : ind.index)
 316                 {
 317                     /* Shift x */
 318                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 319                     /* Rotate y and z.
 320                      * This operation requires a special shift force
 321                      * treatment, which is performed in calc_vir.
 322                      */
 323                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 324                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 325                     n++;
 326                 }
 327             }
 328
 329             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 330
 331             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 332             if (cd->receiveInPlace)
 333             {
 334                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 335             }
 336             else
 337             {
 338                 receiveBuffer = receiveBufferAccess.buffer;
 339             }
 340             /* Send and receive the coordinates */
 341             ddSendrecv(dd, d, dddirBackward,
 342                        sendBuffer, receiveBuffer);
 343
 344             if (!cd->receiveInPlace)
 345             {
 346                 int j = 0;
 347                 for (int zone = 0; zone < nzone; zone++)
 348                 {
 349                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 350                     {
 351                         x[i] = receiveBuffer[j++];
 352                     }
 353                 }
 354             }
 355             nat_tot += ind.nrecv[nzone+1];
 356         }
 357         nzone += nzone;
 358     }
 359
 360     wallcycle_stop(wcycle, ewcMOVEX);
 361 }
 362
 363 void dd_move_f(gmx_domdec_t              *dd,
 364                gmx::ForceWithShiftForces *forceWithShiftForces,
 365                gmx_wallcycle             *wcycle)
 366 {
 367     wallcycle_start(wcycle, ewcMOVEF);
 368
 369     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 370     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 371
 372     gmx_domdec_comm_t       &comm    = *dd->comm;
 373     int                      nzone   = comm.zones.n/2;
 374     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 375     for (int d = dd->ndim-1; d >= 0; d--)
 376     {
 377         /* Only forces in domains near the PBC boundaries need to
 378            consider PBC in the treatment of fshift */
 379         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 380         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 381         /* Determine which shift vector we need */
 382         ivec       vis   = { 0, 0, 0 };
 383         vis[dd->dim[d]]  = 1;
 384         const int  is    = IVEC2IS(vis);
 385
 386         /* Loop over the pulses */
 387         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 388         for (int p = cd.numPulses() - 1; p >= 0; p--)
 389         {
 390             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 391             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 392             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 393
 394             nat_tot                                 -= ind.nrecv[nzone+1];
 395
 396             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 397
 398             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 399             if (cd.receiveInPlace)
 400             {
 401                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 402             }
 403             else
 404             {
 405                 sendBuffer = sendBufferAccess.buffer;
 406                 int j = 0;
 407                 for (int zone = 0; zone < nzone; zone++)
 408                 {
 409                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 410                     {
 411                         sendBuffer[j++] = f[i];
 412                     }
 413                 }
 414             }
 415             /* Communicate the forces */
 416             ddSendrecv(dd, d, dddirForward,
 417                        sendBuffer, receiveBuffer);
 418             /* Add the received forces */
 419             int n = 0;
 420             if (!shiftForcesNeedPbc)
 421             {
 422                 for (int j : ind.index)
 423                 {
 424                     for (int d = 0; d < DIM; d++)
 425                     {
 426                         f[j][d] += receiveBuffer[n][d];
 427                     }
 428                     n++;
 429                 }
 430             }
 431             else if (!applyScrewPbc)
 432             {
 433                 for (int j : ind.index)
 434                 {
 435                     for (int d = 0; d < DIM; d++)
 436                     {
 437                         f[j][d] += receiveBuffer[n][d];
 438                     }
 439                     /* Add this force to the shift force */
 440                     for (int d = 0; d < DIM; d++)
 441                     {
 442                         fshift[is][d] += receiveBuffer[n][d];
 443                     }
 444                     n++;
 445                 }
 446             }
 447             else
 448             {
 449                 for (int j : ind.index)
 450                 {
 451                     /* Rotate the force */
 452                     f[j][XX] += receiveBuffer[n][XX];
 453                     f[j][YY] -= receiveBuffer[n][YY];
 454                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 455                     if (shiftForcesNeedPbc)
 456                     {
 457                         /* Add this force to the shift force */
 458                         for (int d = 0; d < DIM; d++)
 459                         {
 460                             fshift[is][d] += receiveBuffer[n][d];
 461                         }
 462                     }
 463                     n++;
 464                 }
 465             }
 466         }
 467         nzone /= 2;
 468     }
 469     wallcycle_stop(wcycle, ewcMOVEF);
 470 }
 471
 472 /* Convenience function for extracting a real buffer from an rvec buffer
 473  *
 474  * To reduce the number of temporary communication buffers and avoid
 475  * cache polution, we reuse gmx::RVec buffers for storing reals.
 476  * This functions return a real buffer reference with the same number
 477  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 478  */
 479 static gmx::ArrayRef<real>
 480 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 481 {
 482     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 483                                   arrayRef.size());
 484 }
 485
 486 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 487 {
 488     int                    nzone, nat_tot;
 489     gmx_domdec_comm_t     *comm;
 490     gmx_domdec_comm_dim_t *cd;
 491
 492     comm = dd->comm;
 493
 494     nzone   = 1;
 495     nat_tot = comm->atomRanges.numHomeAtoms();
 496     for (int d = 0; d < dd->ndim; d++)
 497     {
 498         cd = &comm->cd[d];
 499         for (const gmx_domdec_ind_t &ind : cd->ind)
 500         {
 501             /* Note: We provision for RVec instead of real, so a factor of 3
 502              * more than needed. The buffer actually already has this size
 503              * and we pass a plain pointer below, so this does not matter.
 504              */
 505             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 506             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 507             int                       n          = 0;
 508             for (int j : ind.index)
 509             {
 510                 sendBuffer[n++] = v[j];
 511             }
 512
 513             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 514
 515             gmx::ArrayRef<real>       receiveBuffer;
 516             if (cd->receiveInPlace)
 517             {
 518                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 519             }
 520             else
 521             {
 522                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 523             }
 524             /* Send and receive the data */
 525             ddSendrecv(dd, d, dddirBackward,
 526                        sendBuffer, receiveBuffer);
 527             if (!cd->receiveInPlace)
 528             {
 529                 int j = 0;
 530                 for (int zone = 0; zone < nzone; zone++)
 531                 {
 532                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 533                     {
 534                         v[i] = receiveBuffer[j++];
 535                     }
 536                 }
 537             }
 538             nat_tot += ind.nrecv[nzone+1];
 539         }
 540         nzone += nzone;
 541     }
 542 }
 543
 544 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 545 {
 546     int                    nzone, nat_tot;
 547     gmx_domdec_comm_t     *comm;
 548     gmx_domdec_comm_dim_t *cd;
 549
 550     comm = dd->comm;
 551
 552     nzone   = comm->zones.n/2;
 553     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 554     for (int d = dd->ndim-1; d >= 0; d--)
 555     {
 556         cd = &comm->cd[d];
 557         for (int p = cd->numPulses() - 1; p >= 0; p--)
 558         {
 559             const gmx_domdec_ind_t &ind = cd->ind[p];
 560
 561             /* Note: We provision for RVec instead of real, so a factor of 3
 562              * more than needed. The buffer actually already has this size
 563              * and we typecast, so this works as intended.
 564              */
 565             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 566             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 567             nat_tot -= ind.nrecv[nzone + 1];
 568
 569             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 570
 571             gmx::ArrayRef<real>       sendBuffer;
 572             if (cd->receiveInPlace)
 573             {
 574                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 575             }
 576             else
 577             {
 578                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 579                 int j = 0;
 580                 for (int zone = 0; zone < nzone; zone++)
 581                 {
 582                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 583                     {
 584                         sendBuffer[j++] = v[i];
 585                     }
 586                 }
 587             }
 588             /* Communicate the forces */
 589             ddSendrecv(dd, d, dddirForward,
 590                        sendBuffer, receiveBuffer);
 591             /* Add the received forces */
 592             int n = 0;
 593             for (int j : ind.index)
 594             {
 595                 v[j] += receiveBuffer[n];
 596                 n++;
 597             }
 598         }
 599         nzone /= 2;
 600     }
 601 }
 602
 603 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 604 {
 605     const gmx_domdec_comm_t &comm       = *dd->comm;
 606     const DDSystemInfo      &systemInfo = comm.systemInfo;
 607
 608     real                     r = -1;
 609     if (systemInfo.haveInterDomainMultiBodyBondeds)
 610     {
 611         if (comm.cutoff_mbody > 0)
 612         {
 613             r = comm.cutoff_mbody;
 614         }
 615         else
 616         {
 617             /* cutoff_mbody=0 means we do not have DLB */
 618             r = comm.cellsize_min[dd->dim[0]];
 619             for (int di = 1; di < dd->ndim; di++)
 620             {
 621                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 622             }
 623             if (comm.systemInfo.filterBondedCommunication)
 624             {
 625                 r = std::max(r, comm.cutoff_mbody);
 626             }
 627             else
 628             {
 629                 r = std::min(r, systemInfo.cutoff);
 630             }
 631         }
 632     }
 633
 634     return r;
 635 }
 636
 637 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 638 {
 639     real r_mb;
 640
 641     r_mb = dd_cutoff_multibody(dd);
 642
 643     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 644 }
 645
 646
 647 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 648                                    const CartesianRankSetup &cartSetup,
 649                                    const ivec                coord,
 650                                    ivec                      coord_pme)
 651 {
 652     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 653     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 654     copy_ivec(coord, coord_pme);
 655     coord_pme[cartSetup.cartpmedim] =
 656         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 657 }
 658
 659 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 660 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 661                             const int          ddCellIndex)
 662 {
 663     const int npp  = ddRankSetup.numPPRanks;
 664     const int npme = ddRankSetup.numRanksDoingPme;
 665
 666     /* Here we assign a PME node to communicate with this DD node
 667      * by assuming that the major index of both is x.
 668      * We add npme/2 to obtain an even distribution.
 669      */
 670     return (ddCellIndex*npme + npme/2)/npp;
 671 }
 672
 673 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 674 {
 675     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 676
 677     int              n = 0;
 678     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 679     {
 680         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 681         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 682         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 683         {
 684             if (debug)
 685             {
 686                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 687             }
 688             pmeRanks[n] = i + 1 + n;
 689             n++;
 690         }
 691     }
 692
 693     return pmeRanks;
 694 }
 695
 696 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 697 {
 698     gmx_domdec_t *dd;
 699     ivec          coords;
 700     int           slab;
 701
 702     dd = cr->dd;
 703     /*
 704        if (dd->comm->bCartesian) {
 705        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 706        dd_coords2pmecoords(dd,coords,coords_pme);
 707        copy_ivec(dd->ntot,nc);
 708        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 709        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 710
 711        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 712        } else {
 713        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 714        }
 715      */
 716     coords[XX] = x;
 717     coords[YY] = y;
 718     coords[ZZ] = z;
 719     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 720
 721     return slab;
 722 }
 723
 724 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 725 {
 726     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 727     ivec                      coords    = { x, y, z };
 728     int                       nodeid    = -1;
 729
 730     if (cartSetup.bCartesianPP_PME)
 731     {
 732 #if GMX_MPI
 733         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 734 #endif
 735     }
 736     else
 737     {
 738         int ddindex = dd_index(cr->dd->nc, coords);
 739         if (cartSetup.bCartesianPP)
 740         {
 741             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 742         }
 743         else
 744         {
 745             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 746             {
 747                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 748             }
 749             else
 750             {
 751                 nodeid = ddindex;
 752             }
 753         }
 754     }
 755
 756     return nodeid;
 757 }
 758
 759 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 760                               const CartesianRankSetup   &cartSetup,
 761                               gmx::ArrayRef<const int>    pmeRanks,
 762                               const t_commrec gmx_unused *cr,
 763                               const int                   sim_nodeid)
 764 {
 765     int                pmenode = -1;
 766
 767     /* This assumes a uniform x domain decomposition grid cell size */
 768     if (cartSetup.bCartesianPP_PME)
 769     {
 770 #if GMX_MPI
 771         ivec coord, coord_pme;
 772         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 773         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 774         {
 775             /* This is a PP rank */
 776             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 777             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 778         }
 779 #endif
 780     }
 781     else if (cartSetup.bCartesianPP)
 782     {
 783         if (sim_nodeid < ddRankSetup.numPPRanks)
 784         {
 785             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 786         }
 787     }
 788     else
 789     {
 790         /* This assumes DD cells with identical x coordinates
 791          * are numbered sequentially.
 792          */
 793         if (pmeRanks.empty())
 794         {
 795             if (sim_nodeid < ddRankSetup.numPPRanks)
 796             {
 797                 /* The DD index equals the nodeid */
 798                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 799             }
 800         }
 801         else
 802         {
 803             int i = 0;
 804             while (sim_nodeid > pmeRanks[i])
 805             {
 806                 i++;
 807             }
 808             if (sim_nodeid < pmeRanks[i])
 809             {
 810                 pmenode = pmeRanks[i];
 811             }
 812         }
 813     }
 814
 815     return pmenode;
 816 }
 817
 818 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 819 {
 820     if (dd != nullptr)
 821     {
 822         return {
 823                    dd->comm->ddRankSetup.npmenodes_x,
 824                    dd->comm->ddRankSetup.npmenodes_y
 825         };
 826     }
 827     else
 828     {
 829         return {
 830                    1, 1
 831         };
 832     }
 833 }
 834
 835 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 836                                  const int        pmenodeid)
 837 {
 838     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 839     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 840     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 841     std::vector<int>          ddranks;
 842     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 843
 844     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 845     {
 846         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 847         {
 848             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 849             {
 850                 if (cartSetup.bCartesianPP_PME)
 851                 {
 852                     ivec coord = { x, y, z };
 853                     ivec coord_pme;
 854                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 855                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 856                         cr->dd->ci[YY] == coord_pme[YY] &&
 857                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 858                     {
 859                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 860                     }
 861                 }
 862                 else
 863                 {
 864                     /* The slab corresponds to the nodeid in the PME group */
 865                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 866                     {
 867                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 868                     }
 869                 }
 870             }
 871         }
 872     }
 873     return ddranks;
 874 }
 875
 876 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 877                                  gmx::ArrayRef<const int>  pmeRanks,
 878                                  const t_commrec          *cr)
 879 {
 880     gmx_bool           bReceive    = TRUE;
 881
 882     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 883     if (ddRankSetup.usePmeOnlyRanks)
 884     {
 885         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 886         if (cartSetup.bCartesianPP_PME)
 887         {
 888 #if GMX_MPI
 889             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 890             ivec coords;
 891             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 892             coords[cartSetup.cartpmedim]++;
 893             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 894             {
 895                 int rank;
 896                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 897                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 898                 {
 899                     /* This is not the last PP node for pmenode */
 900                     bReceive = FALSE;
 901                 }
 902             }
 903 #else
 904             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 905 #endif
 906         }
 907         else
 908         {
 909             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 910             if (cr->sim_nodeid+1 < cr->nnodes &&
 911                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 912             {
 913                 /* This is not the last PP node for pmenode */
 914                 bReceive = FALSE;
 915             }
 916         }
 917     }
 918
 919     return bReceive;
 920 }
 921
 922 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 923 {
 924     gmx_domdec_comm_t *comm;
 925     int                i;
 926
 927     comm = dd->comm;
 928
 929     snew(*dim_f, dd->nc[dim]+1);
 930     (*dim_f)[0] = 0;
 931     for (i = 1; i < dd->nc[dim]; i++)
 932     {
 933         if (comm->slb_frac[dim])
 934         {
 935             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 936         }
 937         else
 938         {
 939             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 940         }
 941     }
 942     (*dim_f)[dd->nc[dim]] = 1;
 943 }
 944
 945 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 946 {
 947     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 948
 949     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 950     {
 951         ddpme->dim = YY;
 952     }
 953     else
 954     {
 955         ddpme->dim = dimind;
 956     }
 957     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 958
 959     ddpme->nslab = (ddpme->dim == 0 ?
 960                     ddRankSetup.npmenodes_x :
 961                     ddRankSetup.npmenodes_y);
 962
 963     if (ddpme->nslab <= 1)
 964     {
 965         return;
 966     }
 967
 968     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
 969     /* Determine for each PME slab the PP location range for dimension dim */
 970     snew(ddpme->pp_min, ddpme->nslab);
 971     snew(ddpme->pp_max, ddpme->nslab);
 972     for (int slab = 0; slab < ddpme->nslab; slab++)
 973     {
 974         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 975         ddpme->pp_max[slab] = 0;
 976     }
 977     for (int i = 0; i < dd->nnodes; i++)
 978     {
 979         ivec xyz;
 980         ddindex2xyz(dd->nc, i, xyz);
 981         /* For y only use our y/z slab.
 982          * This assumes that the PME x grid size matches the DD grid size.
 983          */
 984         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 985         {
 986             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 987             int       slab;
 988             if (dimind == 0)
 989             {
 990                 slab = pmeindex/nso;
 991             }
 992             else
 993             {
 994                 slab = pmeindex % ddpme->nslab;
 995             }
 996             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 997             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 998         }
 999     }
1000
1001     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1002 }
1003
1004 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1005 {
1006     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1007
1008     if (ddRankSetup.ddpme[0].dim == XX)
1009     {
1010         return ddRankSetup.ddpme[0].maxshift;
1011     }
1012     else
1013     {
1014         return 0;
1015     }
1016 }
1017
1018 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1019 {
1020     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1021
1022     if (ddRankSetup.ddpme[0].dim == YY)
1023     {
1024         return ddRankSetup.ddpme[0].maxshift;
1025     }
1026     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1027     {
1028         return ddRankSetup.ddpme[1].maxshift;
1029     }
1030     else
1031     {
1032         return 0;
1033     }
1034 }
1035
1036 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1037 {
1038     return dd.comm->systemInfo.haveSplitConstraints;
1039 }
1040
1041 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1042 {
1043     /* Note that the cycles value can be incorrect, either 0 or some
1044      * extremely large value, when our thread migrated to another core
1045      * with an unsynchronized cycle counter. If this happens less often
1046      * that once per nstlist steps, this will not cause issues, since
1047      * we later subtract the maximum value from the sum over nstlist steps.
1048      * A zero count will slightly lower the total, but that's a small effect.
1049      * Note that the main purpose of the subtraction of the maximum value
1050      * is to avoid throwing off the load balancing when stalls occur due
1051      * e.g. system activity or network congestion.
1052      */
1053     dd->comm->cycl[ddCycl] += cycles;
1054     dd->comm->cycl_n[ddCycl]++;
1055     if (cycles > dd->comm->cycl_max[ddCycl])
1056     {
1057         dd->comm->cycl_max[ddCycl] = cycles;
1058     }
1059 }
1060
1061 #if GMX_MPI
1062 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1063 {
1064     MPI_Comm           c_row;
1065     int                dim, i, rank;
1066     ivec               loc_c;
1067     gmx_bool           bPartOfGroup = FALSE;
1068
1069     dim = dd->dim[dim_ind];
1070     copy_ivec(loc, loc_c);
1071     for (i = 0; i < dd->nc[dim]; i++)
1072     {
1073         loc_c[dim] = i;
1074         rank       = dd_index(dd->nc, loc_c);
1075         if (rank == dd->rank)
1076         {
1077             /* This process is part of the group */
1078             bPartOfGroup = TRUE;
1079         }
1080     }
1081     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1082                    &c_row);
1083     if (bPartOfGroup)
1084     {
1085         dd->comm->mpi_comm_load[dim_ind] = c_row;
1086         if (!isDlbDisabled(dd->comm))
1087         {
1088             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1089
1090             if (dd->ci[dim] == dd->master_ci[dim])
1091             {
1092                 /* This is the root process of this row */
1093                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1094
1095                 RowMaster &rowMaster = *cellsizes.rowMaster;
1096                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1097                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1098                 rowMaster.isCellMin.resize(dd->nc[dim]);
1099                 if (dim_ind > 0)
1100                 {
1101                     rowMaster.bounds.resize(dd->nc[dim]);
1102                 }
1103                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1104             }
1105             else
1106             {
1107                 /* This is not a root process, we only need to receive cell_f */
1108                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1109             }
1110         }
1111         if (dd->ci[dim] == dd->master_ci[dim])
1112         {
1113             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1114         }
1115     }
1116 }
1117 #endif
1118
1119 void dd_setup_dlb_resource_sharing(const t_commrec      *cr,
1120                                    int                   gpu_id)
1121 {
1122 #if GMX_MPI
1123     int           physicalnode_id_hash;
1124     gmx_domdec_t *dd;
1125     MPI_Comm      mpi_comm_pp_physicalnode;
1126
1127     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1128     {
1129         /* Only ranks with short-ranged tasks (currently) use GPUs.
1130          * If we don't have GPUs assigned, there are no resources to share.
1131          */
1132         return;
1133     }
1134
1135     physicalnode_id_hash = gmx_physicalnode_id_hash();
1136
1137     dd = cr->dd;
1138
1139     if (debug)
1140     {
1141         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1142         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1143                 dd->rank, physicalnode_id_hash, gpu_id);
1144     }
1145     /* Split the PP communicator over the physical nodes */
1146     /* TODO: See if we should store this (before), as it's also used for
1147      * for the nodecomm summation.
1148      */
1149     // TODO PhysicalNodeCommunicator could be extended/used to handle
1150     // the need for per-node per-group communicators.
1151     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1152                    &mpi_comm_pp_physicalnode);
1153     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1154                    &dd->comm->mpi_comm_gpu_shared);
1155     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1156     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1157
1158     if (debug)
1159     {
1160         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1161     }
1162
1163     /* Note that some ranks could share a GPU, while others don't */
1164
1165     if (dd->comm->nrank_gpu_shared == 1)
1166     {
1167         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1168     }
1169 #else
1170     GMX_UNUSED_VALUE(cr);
1171     GMX_UNUSED_VALUE(gpu_id);
1172 #endif
1173 }
1174
1175 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1176 {
1177 #if GMX_MPI
1178     int  dim0, dim1, i, j;
1179     ivec loc;
1180
1181     if (debug)
1182     {
1183         fprintf(debug, "Making load communicators\n");
1184     }
1185
1186     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1187     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1188
1189     if (dd->ndim == 0)
1190     {
1191         return;
1192     }
1193
1194     clear_ivec(loc);
1195     make_load_communicator(dd, 0, loc);
1196     if (dd->ndim > 1)
1197     {
1198         dim0 = dd->dim[0];
1199         for (i = 0; i < dd->nc[dim0]; i++)
1200         {
1201             loc[dim0] = i;
1202             make_load_communicator(dd, 1, loc);
1203         }
1204     }
1205     if (dd->ndim > 2)
1206     {
1207         dim0 = dd->dim[0];
1208         for (i = 0; i < dd->nc[dim0]; i++)
1209         {
1210             loc[dim0] = i;
1211             dim1      = dd->dim[1];
1212             for (j = 0; j < dd->nc[dim1]; j++)
1213             {
1214                 loc[dim1] = j;
1215                 make_load_communicator(dd, 2, loc);
1216             }
1217         }
1218     }
1219
1220     if (debug)
1221     {
1222         fprintf(debug, "Finished making load communicators\n");
1223     }
1224 #endif
1225 }
1226
1227 /*! \brief Sets up the relation between neighboring domains and zones */
1228 static void setup_neighbor_relations(gmx_domdec_t *dd)
1229 {
1230     int                     d, dim, i, j, m;
1231     ivec                    tmp, s;
1232     gmx_domdec_zones_t     *zones;
1233     gmx_domdec_ns_ranges_t *izone;
1234     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1235
1236     for (d = 0; d < dd->ndim; d++)
1237     {
1238         dim = dd->dim[d];
1239         copy_ivec(dd->ci, tmp);
1240         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1241         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1242         copy_ivec(dd->ci, tmp);
1243         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1244         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1245         if (debug)
1246         {
1247             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1248                     dd->rank, dim,
1249                     dd->neighbor[d][0],
1250                     dd->neighbor[d][1]);
1251         }
1252     }
1253
1254     int nzone  = (1 << dd->ndim);
1255     int nizone = (1 << std::max(dd->ndim - 1, 0));
1256     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1257
1258     zones = &dd->comm->zones;
1259
1260     for (i = 0; i < nzone; i++)
1261     {
1262         m = 0;
1263         clear_ivec(zones->shift[i]);
1264         for (d = 0; d < dd->ndim; d++)
1265         {
1266             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1267         }
1268     }
1269
1270     zones->n = nzone;
1271     for (i = 0; i < nzone; i++)
1272     {
1273         for (d = 0; d < DIM; d++)
1274         {
1275             s[d] = dd->ci[d] - zones->shift[i][d];
1276             if (s[d] < 0)
1277             {
1278                 s[d] += dd->nc[d];
1279             }
1280             else if (s[d] >= dd->nc[d])
1281             {
1282                 s[d] -= dd->nc[d];
1283             }
1284         }
1285     }
1286     zones->nizone = nizone;
1287     for (i = 0; i < zones->nizone; i++)
1288     {
1289         assert(ddNonbondedZonePairRanges[i][0] == i);
1290
1291         izone     = &zones->izone[i];
1292         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1293          * j-zones up to nzone.
1294          */
1295         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1296         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1297         for (dim = 0; dim < DIM; dim++)
1298         {
1299             if (dd->nc[dim] == 1)
1300             {
1301                 /* All shifts should be allowed */
1302                 izone->shift0[dim] = -1;
1303                 izone->shift1[dim] = 1;
1304             }
1305             else
1306             {
1307                 /* Determine the min/max j-zone shift wrt the i-zone */
1308                 izone->shift0[dim] = 1;
1309                 izone->shift1[dim] = -1;
1310                 for (j = izone->j0; j < izone->j1; j++)
1311                 {
1312                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1313                     if (shift_diff < izone->shift0[dim])
1314                     {
1315                         izone->shift0[dim] = shift_diff;
1316                     }
1317                     if (shift_diff > izone->shift1[dim])
1318                     {
1319                         izone->shift1[dim] = shift_diff;
1320                     }
1321                 }
1322             }
1323         }
1324     }
1325
1326     if (!isDlbDisabled(dd->comm))
1327     {
1328         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1329     }
1330
1331     if (dd->comm->ddSettings.recordLoad)
1332     {
1333         make_load_communicators(dd);
1334     }
1335 }
1336
1337 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1338                                  gmx_domdec_t         *dd,
1339                                  t_commrec gmx_unused *cr,
1340                                  bool gmx_unused       reorder)
1341 {
1342 #if GMX_MPI
1343     gmx_domdec_comm_t  *comm      = dd->comm;
1344     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1345
1346     if (cartSetup.bCartesianPP)
1347     {
1348         /* Set up cartesian communication for the particle-particle part */
1349         GMX_LOG(mdlog.info).appendTextFormatted(
1350                 "Will use a Cartesian communicator: %d x %d x %d",
1351                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1352
1353         ivec periods;
1354         for (int i = 0; i < DIM; i++)
1355         {
1356             periods[i] = TRUE;
1357         }
1358         MPI_Comm comm_cart;
1359         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1360                         &comm_cart);
1361         /* We overwrite the old communicator with the new cartesian one */
1362         cr->mpi_comm_mygroup = comm_cart;
1363     }
1364
1365     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1366     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1367
1368     if (cartSetup.bCartesianPP_PME)
1369     {
1370         /* Since we want to use the original cartesian setup for sim,
1371          * and not the one after split, we need to make an index.
1372          */
1373         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1374         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1375         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1376         /* Get the rank of the DD master,
1377          * above we made sure that the master node is a PP node.
1378          */
1379         int rank;
1380         if (MASTER(cr))
1381         {
1382             rank = dd->rank;
1383         }
1384         else
1385         {
1386             rank = 0;
1387         }
1388         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1389     }
1390     else if (cartSetup.bCartesianPP)
1391     {
1392         if (!comm->ddRankSetup.usePmeOnlyRanks)
1393         {
1394             /* The PP communicator is also
1395              * the communicator for this simulation
1396              */
1397             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1398         }
1399         cr->nodeid = dd->rank;
1400
1401         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1402
1403         /* We need to make an index to go from the coordinates
1404          * to the nodeid of this simulation.
1405          */
1406         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1407         std::vector<int> buf(dd->nnodes);
1408         if (thisRankHasDuty(cr, DUTY_PP))
1409         {
1410             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1411         }
1412         /* Communicate the ddindex to simulation nodeid index */
1413         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1414                       cr->mpi_comm_mysim);
1415
1416         /* Determine the master coordinates and rank.
1417          * The DD master should be the same node as the master of this sim.
1418          */
1419         for (int i = 0; i < dd->nnodes; i++)
1420         {
1421             if (cartSetup.ddindex2simnodeid[i] == 0)
1422             {
1423                 ddindex2xyz(dd->nc, i, dd->master_ci);
1424                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1425             }
1426         }
1427         if (debug)
1428         {
1429             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1430         }
1431     }
1432     else
1433     {
1434         /* No Cartesian communicators */
1435         /* We use the rank in dd->comm->all as DD index */
1436         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1437         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1438         dd->masterrank = 0;
1439         clear_ivec(dd->master_ci);
1440     }
1441 #endif
1442
1443     GMX_LOG(mdlog.info).appendTextFormatted(
1444             "Domain decomposition rank %d, coordinates %d %d %d\n",
1445             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1446     if (debug)
1447     {
1448         fprintf(debug,
1449                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1450                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1451     }
1452 }
1453
1454 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1455                                       t_commrec            *cr)
1456 {
1457 #if GMX_MPI
1458     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1459
1460     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1461     {
1462         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1463         std::vector<int> buf(dd->nnodes);
1464         if (thisRankHasDuty(cr, DUTY_PP))
1465         {
1466             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1467         }
1468         /* Communicate the ddindex to simulation nodeid index */
1469         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1470                       cr->mpi_comm_mysim);
1471     }
1472 #else
1473     GMX_UNUSED_VALUE(dd);
1474     GMX_UNUSED_VALUE(cr);
1475 #endif
1476 }
1477
1478 static CartesianRankSetup
1479 split_communicator(const gmx::MDLogger    &mdlog,
1480                    t_commrec              *cr,
1481                    const DdRankOrder       ddRankOrder,
1482                    bool gmx_unused         reorder,
1483                    const DDRankSetup      &ddRankSetup,
1484                    ivec                    ddCellIndex,
1485                    std::vector<int>       *pmeRanks)
1486 {
1487     CartesianRankSetup cartSetup;
1488
1489     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1490     cartSetup.bCartesianPP_PME = false;
1491
1492     const ivec &numDDCells     = ddRankSetup.numPPCells;
1493     /* Initially we set ntot to the number of PP cells */
1494     copy_ivec(numDDCells, cartSetup.ntot);
1495
1496     if (cartSetup.bCartesianPP)
1497     {
1498         const int numDDCellsTot = ddRankSetup.numPPRanks;
1499         bool      bDiv[DIM];
1500         for (int i = 1; i < DIM; i++)
1501         {
1502             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1503         }
1504         if (bDiv[YY] || bDiv[ZZ])
1505         {
1506             cartSetup.bCartesianPP_PME = TRUE;
1507             /* If we have 2D PME decomposition, which is always in x+y,
1508              * we stack the PME only nodes in z.
1509              * Otherwise we choose the direction that provides the thinnest slab
1510              * of PME only nodes as this will have the least effect
1511              * on the PP communication.
1512              * But for the PME communication the opposite might be better.
1513              */
1514             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1515                              !bDiv[YY] ||
1516                              numDDCells[YY] > numDDCells[ZZ]))
1517             {
1518                 cartSetup.cartpmedim = ZZ;
1519             }
1520             else
1521             {
1522                 cartSetup.cartpmedim = YY;
1523             }
1524             cartSetup.ntot[cartSetup.cartpmedim]
1525                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1526         }
1527         else
1528         {
1529             GMX_LOG(mdlog.info).appendTextFormatted(
1530                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1531                     ddRankSetup.numRanksDoingPme,
1532                     numDDCells[XX], numDDCells[YY],
1533                     numDDCells[XX], numDDCells[ZZ]);
1534             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1535         }
1536     }
1537
1538     if (cartSetup.bCartesianPP_PME)
1539     {
1540 #if GMX_MPI
1541         int  rank;
1542         ivec periods;
1543
1544         GMX_LOG(mdlog.info).appendTextFormatted(
1545                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1546                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1547
1548         for (int i = 0; i < DIM; i++)
1549         {
1550             periods[i] = TRUE;
1551         }
1552         MPI_Comm comm_cart;
1553         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1554                         &comm_cart);
1555         MPI_Comm_rank(comm_cart, &rank);
1556         if (MASTER(cr) && rank != 0)
1557         {
1558             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1559         }
1560
1561         /* With this assigment we loose the link to the original communicator
1562          * which will usually be MPI_COMM_WORLD, unless have multisim.
1563          */
1564         cr->mpi_comm_mysim = comm_cart;
1565         cr->sim_nodeid     = rank;
1566
1567         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1568
1569         GMX_LOG(mdlog.info).appendTextFormatted(
1570                 "Cartesian rank %d, coordinates %d %d %d\n",
1571                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1572
1573         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1574         {
1575             cr->duty = DUTY_PP;
1576         }
1577         if (!ddRankSetup.usePmeOnlyRanks ||
1578             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1579         {
1580             cr->duty = DUTY_PME;
1581         }
1582
1583         /* Split the sim communicator into PP and PME only nodes */
1584         MPI_Comm_split(cr->mpi_comm_mysim,
1585                        getThisRankDuties(cr),
1586                        dd_index(cartSetup.ntot, ddCellIndex),
1587                        &cr->mpi_comm_mygroup);
1588 #else
1589         GMX_UNUSED_VALUE(ddCellIndex);
1590 #endif
1591     }
1592     else
1593     {
1594         switch (ddRankOrder)
1595         {
1596             case DdRankOrder::pp_pme:
1597                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1598                 break;
1599             case DdRankOrder::interleave:
1600                 /* Interleave the PP-only and PME-only ranks */
1601                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1602                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1603                 break;
1604             case DdRankOrder::cartesian:
1605                 break;
1606             default:
1607                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1608         }
1609
1610         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1611         {
1612             cr->duty = DUTY_PME;
1613         }
1614         else
1615         {
1616             cr->duty = DUTY_PP;
1617         }
1618 #if GMX_MPI
1619         /* Split the sim communicator into PP and PME only nodes */
1620         MPI_Comm_split(cr->mpi_comm_mysim,
1621                        getThisRankDuties(cr),
1622                        cr->nodeid,
1623                        &cr->mpi_comm_mygroup);
1624         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1625 #endif
1626     }
1627
1628     GMX_LOG(mdlog.info).appendTextFormatted(
1629             "This rank does only %s work.\n",
1630             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1631
1632     return cartSetup;
1633 }
1634
1635 /*! \brief Makes the PP communicator and the PME communicator, when needed
1636  *
1637  * Returns the Cartesian rank setup.
1638  * Sets \p cr->mpi_comm_mygroup
1639  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1640  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1641  */
1642 static CartesianRankSetup
1643 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1644                        const DDSettings    &ddSettings,
1645                        const DdRankOrder    ddRankOrder,
1646                        const DDRankSetup   &ddRankSetup,
1647                        t_commrec           *cr,
1648                        ivec                 ddCellIndex,
1649                        std::vector<int>    *pmeRanks)
1650 {
1651     CartesianRankSetup cartSetup;
1652
1653     if (ddRankSetup.usePmeOnlyRanks)
1654     {
1655         /* Split the communicator into a PP and PME part */
1656         cartSetup =
1657             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1658                                ddRankSetup, ddCellIndex, pmeRanks);
1659     }
1660     else
1661     {
1662         /* All nodes do PP and PME */
1663         /* We do not require separate communicators */
1664         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1665
1666         cartSetup.bCartesianPP     = false;
1667         cartSetup.bCartesianPP_PME = false;
1668     }
1669
1670     return cartSetup;
1671 }
1672
1673 /*! \brief For PP ranks, sets or makes the communicator
1674  *
1675  * For PME ranks get the rank id.
1676  * For PP only ranks, sets the PME-only rank.
1677  */
1678 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1679                                     const DDSettings         &ddSettings,
1680                                     gmx::ArrayRef<const int>  pmeRanks,
1681                                     t_commrec                *cr,
1682                                     const int                 numAtomsInSystem,
1683                                     gmx_domdec_t             *dd)
1684 {
1685     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1686     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1687
1688     if (thisRankHasDuty(cr, DUTY_PP))
1689     {
1690         /* Copy or make a new PP communicator */
1691
1692         /* We (possibly) reordered the nodes in split_communicator,
1693          * so it is no longer required in make_pp_communicator.
1694          */
1695         const bool useCartesianReorder =
1696             (ddSettings.useCartesianReorder &&
1697              !cartSetup.bCartesianPP_PME);
1698
1699         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1700     }
1701     else
1702     {
1703         receive_ddindex2simnodeid(dd, cr);
1704     }
1705
1706     if (!thisRankHasDuty(cr, DUTY_PME))
1707     {
1708         /* Set up the commnuication to our PME node */
1709         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1710         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1711         if (debug)
1712         {
1713             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1714                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1715         }
1716     }
1717     else
1718     {
1719         dd->pme_nodeid = -1;
1720     }
1721
1722     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1723     if (MASTER(cr))
1724     {
1725         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1726                                                     numAtomsInSystem,
1727                                                     numAtomsInSystem);
1728     }
1729 }
1730
1731 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1732                           const char *dir, int nc, const char *size_string)
1733 {
1734     real  *slb_frac, tot;
1735     int    i, n;
1736     double dbl;
1737
1738     slb_frac = nullptr;
1739     if (nc > 1 && size_string != nullptr)
1740     {
1741         GMX_LOG(mdlog.info).appendTextFormatted(
1742                 "Using static load balancing for the %s direction", dir);
1743         snew(slb_frac, nc);
1744         tot = 0;
1745         for (i = 0; i < nc; i++)
1746         {
1747             dbl = 0;
1748             sscanf(size_string, "%20lf%n", &dbl, &n);
1749             if (dbl == 0)
1750             {
1751                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1752             }
1753             slb_frac[i]  = dbl;
1754             size_string += n;
1755             tot         += slb_frac[i];
1756         }
1757         /* Normalize */
1758         std::string relativeCellSizes = "Relative cell sizes:";
1759         for (i = 0; i < nc; i++)
1760         {
1761             slb_frac[i]       /= tot;
1762             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1763         }
1764         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1765     }
1766
1767     return slb_frac;
1768 }
1769
1770 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1771 {
1772     int                  n     = 0;
1773     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1774     int                  nmol;
1775     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1776     {
1777         for (auto &ilist : extractILists(*ilists, IF_BOND))
1778         {
1779             if (NRAL(ilist.functionType) >  2)
1780             {
1781                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1782             }
1783         }
1784     }
1785
1786     return n;
1787 }
1788
1789 static int dd_getenv(const gmx::MDLogger &mdlog,
1790                      const char *env_var, int def)
1791 {
1792     char *val;
1793     int   nst;
1794
1795     nst = def;
1796     val = getenv(env_var);
1797     if (val)
1798     {
1799         if (sscanf(val, "%20d", &nst) <= 0)
1800         {
1801             nst = 1;
1802         }
1803         GMX_LOG(mdlog.info).appendTextFormatted(
1804                 "Found env.var. %s = %s, using value %d",
1805                 env_var, val, nst);
1806     }
1807
1808     return nst;
1809 }
1810
1811 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1812                                   const t_inputrec    *ir,
1813                                   const gmx::MDLogger &mdlog)
1814 {
1815     if (ir->ePBC == epbcSCREW &&
1816         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1817     {
1818         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1819     }
1820
1821     if (ir->nstlist == 0)
1822     {
1823         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1824     }
1825
1826     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1827     {
1828         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1829     }
1830 }
1831
1832 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1833                                  const ivec         numDomains)
1834 {
1835     real r = ddbox.box_size[XX];
1836     for (int d = 0; d < DIM; d++)
1837     {
1838         if (numDomains[d] > 1)
1839         {
1840             /* Check using the initial average cell size */
1841             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1842         }
1843     }
1844
1845     return r;
1846 }
1847
1848 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1849  */
1850 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1851                                   const std::string   &reasonStr,
1852                                   const gmx::MDLogger &mdlog)
1853 {
1854     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1855     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1856
1857     if (cmdlineDlbState == DlbState::onUser)
1858     {
1859         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1860     }
1861     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1862     {
1863         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1864     }
1865     return DlbState::offForever;
1866 }
1867
1868 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1869  *
1870  * This function parses the parameters of "-dlb" command line option setting
1871  * corresponding state values. Then it checks the consistency of the determined
1872  * state with other run parameters and settings. As a result, the initial state
1873  * may be altered or an error may be thrown if incompatibility of options is detected.
1874  *
1875  * \param [in] mdlog       Logger.
1876  * \param [in] dlbOption   Enum value for the DLB option.
1877  * \param [in] bRecordLoad True if the load balancer is recording load information.
1878  * \param [in] mdrunOptions  Options for mdrun.
1879  * \param [in] ir          Pointer mdrun to input parameters.
1880  * \returns                DLB initial/startup state.
1881  */
1882 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1883                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1884                                          const gmx::MdrunOptions &mdrunOptions,
1885                                          const t_inputrec *ir)
1886 {
1887     DlbState dlbState = DlbState::offCanTurnOn;
1888
1889     switch (dlbOption)
1890     {
1891         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1892         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1893         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1894         default: gmx_incons("Invalid dlbOption enum value");
1895     }
1896
1897     /* Reruns don't support DLB: bail or override auto mode */
1898     if (mdrunOptions.rerun)
1899     {
1900         std::string reasonStr = "it is not supported in reruns.";
1901         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1902     }
1903
1904     /* Unsupported integrators */
1905     if (!EI_DYNAMICS(ir->eI))
1906     {
1907         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1908         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1909     }
1910
1911     /* Without cycle counters we can't time work to balance on */
1912     if (!bRecordLoad)
1913     {
1914         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1915         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1916     }
1917
1918     if (mdrunOptions.reproducible)
1919     {
1920         std::string reasonStr = "you started a reproducible run.";
1921         switch (dlbState)
1922         {
1923             case DlbState::offUser:
1924                 break;
1925             case DlbState::offForever:
1926                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1927                 break;
1928             case DlbState::offCanTurnOn:
1929                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1930             case DlbState::onCanTurnOff:
1931                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1932                 break;
1933             case DlbState::onUser:
1934                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1935             default:
1936                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1937         }
1938     }
1939
1940     return dlbState;
1941 }
1942
1943 static gmx_domdec_comm_t *init_dd_comm()
1944 {
1945     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1946
1947     comm->n_load_have      = 0;
1948     comm->n_load_collect   = 0;
1949
1950     comm->haveTurnedOffDlb = false;
1951
1952     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1953     {
1954         comm->sum_nat[i] = 0;
1955     }
1956     comm->ndecomp   = 0;
1957     comm->nload     = 0;
1958     comm->load_step = 0;
1959     comm->load_sum  = 0;
1960     comm->load_max  = 0;
1961     clear_ivec(comm->load_lim);
1962     comm->load_mdf  = 0;
1963     comm->load_pme  = 0;
1964
1965     /* This should be replaced by a unique pointer */
1966     comm->balanceRegion = ddBalanceRegionAllocate();
1967
1968     return comm;
1969 }
1970
1971 /* Returns whether mtop contains constraints and/or vsites */
1972 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1973 {
1974     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1975     int  nmol;
1976     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1977     {
1978         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1979         {
1980             return true;
1981         }
1982     }
1983
1984     return false;
1985 }
1986
1987 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1988                               const gmx_mtop_t    &mtop,
1989                               const t_inputrec    &inputrec,
1990                               const real           cutoffMargin,
1991                               DDSystemInfo        *systemInfo)
1992 {
1993     /* When we have constraints and/or vsites, it is beneficial to use
1994      * update groups (when possible) to allow independent update of groups.
1995      */
1996     if (!systemHasConstraintsOrVsites(mtop))
1997     {
1998         /* No constraints or vsites, atoms can be updated independently */
1999         return;
2000     }
2001
2002     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2003     systemInfo->useUpdateGroups               =
2004         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2005          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2006
2007     if (systemInfo->useUpdateGroups)
2008     {
2009         int numUpdateGroups = 0;
2010         for (const auto &molblock : mtop.molblock)
2011         {
2012             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2013         }
2014
2015         systemInfo->maxUpdateGroupRadius =
2016             computeMaxUpdateGroupRadius(mtop,
2017                                         systemInfo->updateGroupingPerMoleculetype,
2018                                         maxReferenceTemperature(inputrec));
2019
2020         /* To use update groups, the large domain-to-domain cutoff distance
2021          * should be compatible with the box size.
2022          */
2023         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2024
2025         if (systemInfo->useUpdateGroups)
2026         {
2027             GMX_LOG(mdlog.info).appendTextFormatted(
2028                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2029                     numUpdateGroups,
2030                     mtop.natoms/static_cast<double>(numUpdateGroups),
2031                     systemInfo->maxUpdateGroupRadius);
2032         }
2033         else
2034         {
2035             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2036             systemInfo->updateGroupingPerMoleculetype.clear();
2037         }
2038     }
2039 }
2040
2041 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2042     npbcdim(ePBC2npbcdim(ir.ePBC)),
2043     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2044     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2045     haveScrewPBC(ir.ePBC == epbcSCREW)
2046 {
2047 }
2048
2049 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2050 static bool
2051 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2052                         const bool                                   useUpdateGroups,
2053                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2054 {
2055     if (useUpdateGroups)
2056     {
2057         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2058                            "Need one grouping per moltype");
2059         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2060         {
2061             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2062             {
2063                 return false;
2064             }
2065         }
2066     }
2067     else
2068     {
2069         for (const auto &moltype : mtop.moltype)
2070         {
2071             if (moltype.atoms.nr > 1)
2072             {
2073                 return false;
2074             }
2075         }
2076     }
2077
2078     return true;
2079 }
2080
2081 /*! \brief Generate the simulation system information */
2082 static DDSystemInfo
2083 getSystemInfo(const gmx::MDLogger           &mdlog,
2084               const t_commrec               *cr,
2085               const DomdecOptions           &options,
2086               const gmx_mtop_t              *mtop,
2087               const t_inputrec              *ir,
2088               const matrix                   box,
2089               gmx::ArrayRef<const gmx::RVec> xGlobal)
2090 {
2091     const real         tenPercentMargin = 1.1;
2092
2093     DDSystemInfo       systemInfo;
2094
2095     /* We need to decide on update groups early, as this affects communication distances */
2096     systemInfo.useUpdateGroups = false;
2097     if (ir->cutoff_scheme == ecutsVERLET)
2098     {
2099         real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2100         setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2101     }
2102
2103     systemInfo.moleculesAreAlwaysWhole         =
2104         moleculesAreAlwaysWhole(*mtop,
2105                                 systemInfo.useUpdateGroups,
2106                                 systemInfo.updateGroupingPerMoleculetype);
2107     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2108                                                   mtop->bIntermolecularInteractions);
2109     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2110                                                   multi_body_bondeds_count(mtop) > 0);
2111
2112     if (systemInfo.useUpdateGroups)
2113     {
2114         systemInfo.haveSplitConstraints = false;
2115         systemInfo.haveSplitSettles     = false;
2116     }
2117     else
2118     {
2119         systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2120         systemInfo.haveSplitSettles     = gmx::inter_charge_group_settles(*mtop);
2121     }
2122
2123     if (ir->rlist == 0)
2124     {
2125         /* Set the cut-off to some very large value,
2126          * so we don't need if statements everywhere in the code.
2127          * We use sqrt, since the cut-off is squared in some places.
2128          */
2129         systemInfo.cutoff = GMX_CUTOFF_INF;
2130     }
2131     else
2132     {
2133         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2134     }
2135     systemInfo.minCutoffForMultiBody = 0;
2136
2137     /* Determine the minimum cell size limit, affected by many factors */
2138     systemInfo.cellsizeLimit             = 0;
2139     systemInfo.filterBondedCommunication = false;
2140
2141     /* We do not allow home atoms to move beyond the neighboring domain
2142      * between domain decomposition steps, which limits the cell size.
2143      * Get an estimate of cell size limit due to atom displacement.
2144      * In most cases this is a large overestimate, because it assumes
2145      * non-interaction atoms.
2146      * We set the chance to 1 in a trillion steps.
2147      */
2148     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2149     const real     limitForAtomDisplacement          =
2150         minCellSizeForAtomDisplacement(*mtop, *ir,
2151                                        systemInfo.updateGroupingPerMoleculetype,
2152                                        c_chanceThatAtomMovesBeyondDomain);
2153     GMX_LOG(mdlog.info).appendTextFormatted(
2154             "Minimum cell size due to atom displacement: %.3f nm",
2155             limitForAtomDisplacement);
2156
2157     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2158                                         limitForAtomDisplacement);
2159
2160     /* TODO: PME decomposition currently requires atoms not to be more than
2161      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2162      *       In nearly all cases, limitForAtomDisplacement will be smaller
2163      *       than 2/3*rlist, so the PME requirement is satisfied.
2164      *       But it would be better for both correctness and performance
2165      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2166      *       Note that we would need to improve the pairlist buffer case.
2167      */
2168
2169     if (systemInfo.haveInterDomainBondeds)
2170     {
2171         if (options.minimumCommunicationRange > 0)
2172         {
2173             systemInfo.minCutoffForMultiBody =
2174                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2175             if (options.useBondedCommunication)
2176             {
2177                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2178             }
2179             else
2180             {
2181                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2182                                              systemInfo.minCutoffForMultiBody);
2183             }
2184         }
2185         else if (ir->bPeriodicMols)
2186         {
2187             /* Can not easily determine the required cut-off */
2188             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2189             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2190         }
2191         else
2192         {
2193             real r_2b, r_mb;
2194
2195             if (MASTER(cr))
2196             {
2197                 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2198                                       options.checkBondedInteractions,
2199                                       &r_2b, &r_mb);
2200             }
2201             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2202             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2203
2204             /* We use an initial margin of 10% for the minimum cell size,
2205              * except when we are just below the non-bonded cut-off.
2206              */
2207             if (options.useBondedCommunication)
2208             {
2209                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2210                 {
2211                     const real r_bonded              = std::max(r_2b, r_mb);
2212                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2213                     /* This is the (only) place where we turn on the filtering */
2214                     systemInfo.filterBondedCommunication = true;
2215                 }
2216                 else
2217                 {
2218                     const real r_bonded              = r_mb;
2219                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2220                                                                 systemInfo.cutoff);
2221                 }
2222                 /* We determine cutoff_mbody later */
2223                 systemInfo.increaseMultiBodyCutoff = true;
2224             }
2225             else
2226             {
2227                 /* No special bonded communication,
2228                  * simply increase the DD cut-off.
2229                  */
2230                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2231                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2232                                                             systemInfo.minCutoffForMultiBody);
2233             }
2234         }
2235         GMX_LOG(mdlog.info).appendTextFormatted(
2236                 "Minimum cell size due to bonded interactions: %.3f nm",
2237                 systemInfo.minCutoffForMultiBody);
2238
2239         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2240                                             systemInfo.minCutoffForMultiBody);
2241     }
2242
2243     systemInfo.constraintCommunicationRange = 0;
2244     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2245     {
2246         /* There is a cell size limit due to the constraints (P-LINCS) */
2247         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2248         GMX_LOG(mdlog.info).appendTextFormatted(
2249                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2250                 systemInfo.constraintCommunicationRange);
2251         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2252         {
2253             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2254         }
2255     }
2256     else if (options.constraintCommunicationRange > 0)
2257     {
2258         /* Here we do not check for dd->splitConstraints.
2259          * because one can also set a cell size limit for virtual sites only
2260          * and at this point we don't know yet if there are intercg v-sites.
2261          */
2262         GMX_LOG(mdlog.info).appendTextFormatted(
2263                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2264                 options.constraintCommunicationRange);
2265         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2266     }
2267     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2268                                         systemInfo.constraintCommunicationRange);
2269
2270     return systemInfo;
2271 }
2272
2273 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2274  * implemented. */
2275 static void
2276 checkDDGridSetup(const DDGridSetup   &ddGridSetup,
2277                  const t_commrec     *cr,
2278                  const DomdecOptions &options,
2279                  const DDSettings    &ddSettings,
2280                  const DDSystemInfo  &systemInfo,
2281                  const real           cellsizeLimit,
2282                  const gmx_ddbox_t   &ddbox)
2283 {
2284     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2285     {
2286         char     buf[STRLEN];
2287         gmx_bool bC = (systemInfo.haveSplitConstraints &&
2288                        systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2289         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2290                 !bC ? "-rdd" : "-rcon",
2291                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2292                 bC ? " or your LINCS settings" : "");
2293
2294         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2295                              "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2296                              "%s\n"
2297                              "Look in the log file for details on the domain decomposition",
2298                              cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2299                              cellsizeLimit,
2300                              buf);
2301     }
2302
2303     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2304     if (acs < cellsizeLimit)
2305     {
2306         if (options.numCells[XX] <= 0)
2307         {
2308             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2309         }
2310         else
2311         {
2312             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2313                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2314                                  acs, cellsizeLimit);
2315         }
2316     }
2317
2318     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2319     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2320     {
2321         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2322                              "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2323                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2324     }
2325     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2326     {
2327         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2328                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2329     }
2330 }
2331
2332 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2333 static DDRankSetup
2334 getDDRankSetup(const gmx::MDLogger &mdlog,
2335                t_commrec           *cr,
2336                const DDGridSetup   &ddGridSetup,
2337                const t_inputrec    &ir)
2338 {
2339     GMX_LOG(mdlog.info).appendTextFormatted(
2340             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2341             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2342             ddGridSetup.numPmeOnlyRanks);
2343
2344     DDRankSetup ddRankSetup;
2345
2346     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2347     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2348
2349     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2350     if (ddRankSetup.usePmeOnlyRanks)
2351     {
2352         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2353     }
2354     else
2355     {
2356         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2357     }
2358
2359     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2360     {
2361         /* The following choices should match those
2362          * in comm_cost_est in domdec_setup.c.
2363          * Note that here the checks have to take into account
2364          * that the decomposition might occur in a different order than xyz
2365          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2366          * in which case they will not match those in comm_cost_est,
2367          * but since that is mainly for testing purposes that's fine.
2368          */
2369         if (ddGridSetup.numDDDimensions >= 2 &&
2370             ddGridSetup.ddDimensions[0] == XX &&
2371             ddGridSetup.ddDimensions[1] == YY &&
2372             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2373             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2374             getenv("GMX_PMEONEDD") == nullptr)
2375         {
2376             ddRankSetup.npmedecompdim = 2;
2377             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2378             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2379         }
2380         else
2381         {
2382             /* In case nc is 1 in both x and y we could still choose to
2383              * decompose pme in y instead of x, but we use x for simplicity.
2384              */
2385             ddRankSetup.npmedecompdim = 1;
2386             if (ddGridSetup.ddDimensions[0] == YY)
2387             {
2388                 ddRankSetup.npmenodes_x = 1;
2389                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2390             }
2391             else
2392             {
2393                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2394                 ddRankSetup.npmenodes_y = 1;
2395             }
2396         }
2397         GMX_LOG(mdlog.info).appendTextFormatted(
2398                 "PME domain decomposition: %d x %d x %d",
2399                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2400     }
2401     else
2402     {
2403         ddRankSetup.npmedecompdim = 0;
2404         ddRankSetup.npmenodes_x   = 0;
2405         ddRankSetup.npmenodes_y   = 0;
2406     }
2407
2408     return ddRankSetup;
2409 }
2410
2411 /*! \brief Set the cell size and interaction limits */
2412 static void set_dd_limits(const gmx::MDLogger &mdlog,
2413                           t_commrec *cr, gmx_domdec_t *dd,
2414                           const DomdecOptions &options,
2415                           const DDSettings &ddSettings,
2416                           const DDSystemInfo &systemInfo,
2417                           const DDGridSetup &ddGridSetup,
2418                           const int numPPRanks,
2419                           const gmx_mtop_t *mtop,
2420                           const t_inputrec *ir,
2421                           const gmx_ddbox_t &ddbox)
2422 {
2423     gmx_domdec_comm_t *comm = dd->comm;
2424     comm->ddSettings        = ddSettings;
2425
2426     /* Initialize to GPU share count to 0, might change later */
2427     comm->nrank_gpu_shared = 0;
2428
2429     comm->dlbState         = comm->ddSettings.initialDlbState;
2430     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2431     /* To consider turning DLB on after 2*nstlist steps we need to check
2432      * at partitioning count 3. Thus we need to increase the first count by 2.
2433      */
2434     comm->ddPartioningCountFirstDlbOff += 2;
2435
2436     comm->bPMELoadBalDLBLimits          = FALSE;
2437
2438     /* Allocate the charge group/atom sorting struct */
2439     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2440
2441     comm->systemInfo = systemInfo;
2442
2443     if (systemInfo.useUpdateGroups)
2444     {
2445         /* Note: We would like to use dd->nnodes for the atom count estimate,
2446          *       but that is not yet available here. But this anyhow only
2447          *       affect performance up to the second dd_partition_system call.
2448          */
2449         const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
2450         comm->updateGroupsCog =
2451             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2452                                                    systemInfo.updateGroupingPerMoleculetype,
2453                                                    maxReferenceTemperature(*ir),
2454                                                    homeAtomCountEstimate);
2455     }
2456
2457     /* Set the DD setup given by ddGridSetup */
2458     copy_ivec(ddGridSetup.numDomains, dd->nc);
2459     dd->ndim = ddGridSetup.numDDDimensions;
2460     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2461
2462     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2463
2464     snew(comm->slb_frac, DIM);
2465     if (isDlbDisabled(comm))
2466     {
2467         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2468         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2469         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2470     }
2471
2472     /* Set the multi-body cut-off and cellsize limit for DLB */
2473     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2474     comm->cellsize_limit = systemInfo.cellsizeLimit;
2475     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2476     {
2477         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2478         {
2479             /* Set the bonded communication distance to halfway
2480              * the minimum and the maximum,
2481              * since the extra communication cost is nearly zero.
2482              */
2483             real acs           = average_cellsize_min(ddbox, dd->nc);
2484             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2485             if (!isDlbDisabled(comm))
2486             {
2487                 /* Check if this does not limit the scaling */
2488                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2489                                               options.dlbScaling*acs);
2490             }
2491             if (!systemInfo.filterBondedCommunication)
2492             {
2493                 /* Without bBondComm do not go beyond the n.b. cut-off */
2494                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2495                 if (comm->cellsize_limit >= systemInfo.cutoff)
2496                 {
2497                     /* We don't loose a lot of efficieny
2498                      * when increasing it to the n.b. cut-off.
2499                      * It can even be slightly faster, because we need
2500                      * less checks for the communication setup.
2501                      */
2502                     comm->cutoff_mbody = systemInfo.cutoff;
2503                 }
2504             }
2505             /* Check if we did not end up below our original limit */
2506             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2507                                           systemInfo.minCutoffForMultiBody);
2508
2509             if (comm->cutoff_mbody > comm->cellsize_limit)
2510             {
2511                 comm->cellsize_limit = comm->cutoff_mbody;
2512             }
2513         }
2514         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2515     }
2516
2517     if (debug)
2518     {
2519         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2520                 "cellsize limit %f\n",
2521                 gmx::boolToString(systemInfo.filterBondedCommunication),
2522                 comm->cellsize_limit);
2523     }
2524
2525     if (MASTER(cr))
2526     {
2527         check_dd_restrictions(dd, ir, mdlog);
2528     }
2529 }
2530
2531 void dd_init_bondeds(FILE                       *fplog,
2532                      gmx_domdec_t               *dd,
2533                      const gmx_mtop_t           *mtop,
2534                      const gmx_vsite_t          *vsite,
2535                      const t_inputrec           *ir,
2536                      gmx_bool                    bBCheck,
2537                      cginfo_mb_t                *cginfo_mb)
2538 {
2539     gmx_domdec_comm_t *comm;
2540
2541     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2542
2543     comm = dd->comm;
2544
2545     if (comm->systemInfo.filterBondedCommunication)
2546     {
2547         /* Communicate atoms beyond the cut-off for bonded interactions */
2548         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2549     }
2550     else
2551     {
2552         /* Only communicate atoms based on cut-off */
2553         comm->bondedLinks = nullptr;
2554     }
2555 }
2556
2557 static void writeSettings(gmx::TextWriter       *log,
2558                           gmx_domdec_t          *dd,
2559                           const gmx_mtop_t      *mtop,
2560                           const t_inputrec      *ir,
2561                           gmx_bool               bDynLoadBal,
2562                           real                   dlb_scale,
2563                           const gmx_ddbox_t     *ddbox)
2564 {
2565     gmx_domdec_comm_t *comm;
2566     int                d;
2567     ivec               np;
2568     real               limit, shrink;
2569
2570     comm = dd->comm;
2571
2572     if (bDynLoadBal)
2573     {
2574         log->writeString("The maximum number of communication pulses is:");
2575         for (d = 0; d < dd->ndim; d++)
2576         {
2577             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2578         }
2579         log->ensureLineBreak();
2580         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2581         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2582         log->writeString("The allowed shrink of domain decomposition cells is:");
2583         for (d = 0; d < DIM; d++)
2584         {
2585             if (dd->nc[d] > 1)
2586             {
2587                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2588                 {
2589                     shrink = 0;
2590                 }
2591                 else
2592                 {
2593                     shrink =
2594                         comm->cellsize_min_dlb[d]/
2595                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2596                 }
2597                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2598             }
2599         }
2600         log->ensureLineBreak();
2601     }
2602     else
2603     {
2604         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2605         log->writeString("The initial number of communication pulses is:");
2606         for (d = 0; d < dd->ndim; d++)
2607         {
2608             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2609         }
2610         log->ensureLineBreak();
2611         log->writeString("The initial domain decomposition cell size is:");
2612         for (d = 0; d < DIM; d++)
2613         {
2614             if (dd->nc[d] > 1)
2615             {
2616                 log->writeStringFormatted(" %c %.2f nm",
2617                                           dim2char(d), dd->comm->cellsize_min[d]);
2618             }
2619         }
2620         log->ensureLineBreak();
2621         log->writeLine();
2622     }
2623
2624     const bool haveInterDomainVsites =
2625         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2626
2627     if (comm->systemInfo.haveInterDomainBondeds ||
2628         haveInterDomainVsites ||
2629         comm->systemInfo.haveSplitConstraints ||
2630         comm->systemInfo.haveSplitSettles)
2631     {
2632         std::string decompUnits;
2633         if (comm->systemInfo.useUpdateGroups)
2634         {
2635             decompUnits = "atom groups";
2636         }
2637         else
2638         {
2639             decompUnits = "atoms";
2640         }
2641
2642         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2643         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2644
2645         if (bDynLoadBal)
2646         {
2647             limit = dd->comm->cellsize_limit;
2648         }
2649         else
2650         {
2651             if (dd->unitCellInfo.ddBoxIsDynamic)
2652             {
2653                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2654             }
2655             limit = dd->comm->cellsize_min[XX];
2656             for (d = 1; d < DIM; d++)
2657             {
2658                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2659             }
2660         }
2661
2662         if (comm->systemInfo.haveInterDomainBondeds)
2663         {
2664             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2665                                     "two-body bonded interactions", "(-rdd)",
2666                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2667             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2668                                     "multi-body bonded interactions", "(-rdd)",
2669                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2670         }
2671         if (haveInterDomainVsites)
2672         {
2673             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2674                                     "virtual site constructions", "(-rcon)", limit);
2675         }
2676         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2677         {
2678             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2679                                                        1+ir->nProjOrder);
2680             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2681                                     separation.c_str(), "(-rcon)", limit);
2682         }
2683         log->ensureLineBreak();
2684     }
2685 }
2686
2687 static void logSettings(const gmx::MDLogger &mdlog,
2688                         gmx_domdec_t        *dd,
2689                         const gmx_mtop_t    *mtop,
2690                         const t_inputrec    *ir,
2691                         real                 dlb_scale,
2692                         const gmx_ddbox_t   *ddbox)
2693 {
2694     gmx::StringOutputStream stream;
2695     gmx::TextWriter         log(&stream);
2696     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2697     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2698     {
2699         {
2700             log.ensureEmptyLine();
2701             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2702         }
2703         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2704     }
2705     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2706 }
2707
2708 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2709                                 gmx_domdec_t        *dd,
2710                                 real                 dlb_scale,
2711                                 const t_inputrec    *ir,
2712                                 const gmx_ddbox_t   *ddbox)
2713 {
2714     gmx_domdec_comm_t *comm;
2715     int                d, dim, npulse, npulse_d_max, npulse_d;
2716     gmx_bool           bNoCutOff;
2717
2718     comm = dd->comm;
2719
2720     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2721
2722     /* Determine the maximum number of comm. pulses in one dimension */
2723
2724     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2725
2726     /* Determine the maximum required number of grid pulses */
2727     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2728     {
2729         /* Only a single pulse is required */
2730         npulse = 1;
2731     }
2732     else if (!bNoCutOff && comm->cellsize_limit > 0)
2733     {
2734         /* We round down slightly here to avoid overhead due to the latency
2735          * of extra communication calls when the cut-off
2736          * would be only slightly longer than the cell size.
2737          * Later cellsize_limit is redetermined,
2738          * so we can not miss interactions due to this rounding.
2739          */
2740         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2741     }
2742     else
2743     {
2744         /* There is no cell size limit */
2745         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2746     }
2747
2748     if (!bNoCutOff && npulse > 1)
2749     {
2750         /* See if we can do with less pulses, based on dlb_scale */
2751         npulse_d_max = 0;
2752         for (d = 0; d < dd->ndim; d++)
2753         {
2754             dim      = dd->dim[d];
2755             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2756                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2757             npulse_d_max = std::max(npulse_d_max, npulse_d);
2758         }
2759         npulse = std::min(npulse, npulse_d_max);
2760     }
2761
2762     /* This env var can override npulse */
2763     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2764     if (d > 0)
2765     {
2766         npulse = d;
2767     }
2768
2769     comm->maxpulse       = 1;
2770     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2771     for (d = 0; d < dd->ndim; d++)
2772     {
2773         if (comm->ddSettings.request1DAnd1Pulse)
2774         {
2775             comm->cd[d].np_dlb = 1;
2776         }
2777         else
2778         {
2779             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2780             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2781         }
2782         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2783         {
2784             comm->bVacDLBNoLimit = FALSE;
2785         }
2786     }
2787
2788     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2789     if (!comm->bVacDLBNoLimit)
2790     {
2791         comm->cellsize_limit = std::max(comm->cellsize_limit,
2792                                         comm->systemInfo.cutoff/comm->maxpulse);
2793     }
2794     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2795     /* Set the minimum cell size for each DD dimension */
2796     for (d = 0; d < dd->ndim; d++)
2797     {
2798         if (comm->bVacDLBNoLimit ||
2799             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2800         {
2801             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2802         }
2803         else
2804         {
2805             comm->cellsize_min_dlb[dd->dim[d]] =
2806                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2807         }
2808     }
2809     if (comm->cutoff_mbody <= 0)
2810     {
2811         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2812     }
2813     if (isDlbOn(comm))
2814     {
2815         set_dlb_limits(dd);
2816     }
2817 }
2818
2819 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2820 {
2821     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2822 }
2823
2824 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2825 {
2826     /* If each molecule is a single charge group
2827      * or we use domain decomposition for each periodic dimension,
2828      * we do not need to take pbc into account for the bonded interactions.
2829      */
2830     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2831             !(dd->nc[XX] > 1 &&
2832               dd->nc[YY] > 1 &&
2833               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2834 }
2835
2836 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2837 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2838                                   gmx_domdec_t *dd, real dlb_scale,
2839                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2840                                   const gmx_ddbox_t *ddbox)
2841 {
2842     gmx_domdec_comm_t *comm        = dd->comm;
2843     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2844
2845     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2846     {
2847         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2848         if (ddRankSetup.npmedecompdim >= 2)
2849         {
2850             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2851         }
2852     }
2853     else
2854     {
2855         ddRankSetup.numRanksDoingPme = 0;
2856         if (dd->pme_nodeid >= 0)
2857         {
2858             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2859                                  "Can not have separate PME ranks without PME electrostatics");
2860         }
2861     }
2862
2863     if (debug)
2864     {
2865         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2866     }
2867     if (!isDlbDisabled(comm))
2868     {
2869         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2870     }
2871
2872     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2873
2874     real vol_frac;
2875     if (ir->ePBC == epbcNONE)
2876     {
2877         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2878     }
2879     else
2880     {
2881         vol_frac =
2882             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2883     }
2884     if (debug)
2885     {
2886         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2887     }
2888     int natoms_tot = mtop->natoms;
2889
2890     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
2891                                      static_cast<int>(vol_frac*natoms_tot));
2892 }
2893
2894 /*! \brief Get some important DD parameters which can be modified by env.vars */
2895 static DDSettings
2896 getDDSettings(const gmx::MDLogger     &mdlog,
2897               const DomdecOptions     &options,
2898               const gmx::MdrunOptions &mdrunOptions,
2899               const t_inputrec        &ir)
2900 {
2901     DDSettings ddSettings;
2902
2903     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2904     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2905     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2906     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2907     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2908     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2909     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2910     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2911     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2912     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2913
2914     if (ddSettings.useSendRecv2)
2915     {
2916         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2917     }
2918
2919     if (ddSettings.eFlop)
2920     {
2921         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2922         ddSettings.recordLoad = true;
2923     }
2924     else
2925     {
2926         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2927     }
2928
2929     ddSettings.initialDlbState =
2930         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2931     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2932                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2933
2934     return ddSettings;
2935 }
2936
2937 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2938     unitCellInfo(ir)
2939 {
2940 }
2941
2942 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2943  *
2944  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2945  * generally requires a larger box than other possible decompositions
2946  * with the same rank count, so the calling code might need to decide
2947  * what is the most appropriate way to run the simulation based on
2948  * whether such a DD is possible.
2949  *
2950  * This function works like init_domain_decomposition(), but will not
2951  * give a fatal error, and only uses \c cr for communicating between
2952  * ranks.
2953  *
2954  * It is safe to call before thread-MPI spawns ranks, so that
2955  * thread-MPI can decide whether and how to trigger the GPU halo
2956  * exchange code path. The number of PME ranks, if any, should be set
2957  * in \c options.numPmeRanks.
2958  */
2959 static bool
2960 canMake1DAnd1PulseDomainDecomposition(const DDSettings              &ddSettingsOriginal,
2961                                       const t_commrec               *cr,
2962                                       const int                      numRanksRequested,
2963                                       const DomdecOptions           &options,
2964                                       const gmx_mtop_t              &mtop,
2965                                       const t_inputrec              &ir,
2966                                       const matrix                   box,
2967                                       gmx::ArrayRef<const gmx::RVec> xGlobal)
2968 {
2969     // Ensure we don't write any output from this checking routine
2970     gmx::MDLogger dummyLogger;
2971
2972     DDSystemInfo  systemInfo = getSystemInfo(dummyLogger, cr, options, &mtop, &ir, box, xGlobal);
2973
2974     DDSettings    ddSettings = ddSettingsOriginal;
2975     ddSettings.request1DAnd1Pulse = true;
2976     const real    gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2977                                                                        !isDlbDisabled(ddSettings.initialDlbState),
2978                                                                        options.dlbScaling, ir,
2979                                                                        systemInfo.cellsizeLimit);
2980     gmx_ddbox_t ddbox       = {0};
2981     DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2982                                              ddSettings, systemInfo, gridSetupCellsizeLimit,
2983                                              mtop, ir, box, xGlobal, &ddbox);
2984
2985     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2986
2987     return canMakeDDWith1DAnd1Pulse;
2988 }
2989
2990 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2991 {
2992     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2993     const int  productOfDimensionSizes      = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2994     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2995
2996     const bool hasMax1Pulse =
2997         ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
2998          (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
2999
3000     return decompositionHasOneDimension && hasMax1Pulse;
3001
3002 }
3003
3004 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
3005                                         t_commrec                     *cr,
3006                                         const DomdecOptions           &options,
3007                                         const gmx::MdrunOptions       &mdrunOptions,
3008                                         const bool                     prefer1DAnd1Pulse,
3009                                         const gmx_mtop_t              *mtop,
3010                                         const t_inputrec              *ir,
3011                                         const matrix                   box,
3012                                         gmx::ArrayRef<const gmx::RVec> xGlobal,
3013                                         gmx::LocalAtomSetManager      *atomSets)
3014 {
3015     GMX_LOG(mdlog.info).appendTextFormatted(
3016             "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3017
3018     DDSettings  ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3019
3020     if (prefer1DAnd1Pulse &&
3021         canMake1DAnd1PulseDomainDecomposition(ddSettings, cr, cr->nnodes, options,
3022                                               *mtop, *ir, box, xGlobal))
3023     {
3024         ddSettings.request1DAnd1Pulse = true;
3025     }
3026
3027     if (ddSettings.eFlop > 1)
3028     {
3029         /* Ensure that we have different random flop counts on different ranks */
3030         srand(1 + cr->nodeid);
3031     }
3032
3033     DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3034
3035     int          numRanksRequested = cr->nnodes;
3036     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks);
3037
3038     // DD grid setup uses a more different cell size limit for
3039     // automated setup than the one in systemInfo. The latter is used
3040     // in set_dd_limits() to configure DLB, for example.
3041     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse,
3042                                                                     !isDlbDisabled(ddSettings.initialDlbState),
3043                                                                     options.dlbScaling, *ir,
3044                                                                     systemInfo.cellsizeLimit);
3045     gmx_ddbox_t  ddbox       = {0};
3046     DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options,
3047                                               ddSettings, systemInfo, gridSetupCellsizeLimit,
3048                                               *mtop, *ir, box, xGlobal, &ddbox);
3049     checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox);
3050
3051     cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3052
3053     DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3054
3055     /* Generate the group communicator, also decides the duty of each rank */
3056     ivec               ddCellIndex = { 0, 0, 0 };
3057     std::vector<int>   pmeRanks;
3058     CartesianRankSetup cartSetup   =
3059         makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3060                                ddRankSetup, cr,
3061                                ddCellIndex, &pmeRanks);
3062
3063     gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3064
3065     copy_ivec(ddCellIndex, dd->ci);
3066
3067     dd->comm = init_dd_comm();
3068
3069     dd->comm->ddRankSetup        = ddRankSetup;
3070     dd->comm->cartesianRankSetup = cartSetup;
3071
3072     set_dd_limits(mdlog, cr, dd, options,
3073                   ddSettings, systemInfo,
3074                   ddGridSetup,
3075                   ddRankSetup.numPPRanks,
3076                   mtop, ir,
3077                   ddbox);
3078
3079     setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, mtop->natoms, dd);
3080
3081     if (thisRankHasDuty(cr, DUTY_PP))
3082     {
3083         set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3084
3085         setup_neighbor_relations(dd);
3086     }
3087
3088     /* Set overallocation to avoid frequent reallocation of arrays */
3089     set_over_alloc_dd(TRUE);
3090
3091     dd->atomSets = atomSets;
3092
3093     return dd;
3094 }
3095
3096 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3097                                const matrix                   box,
3098                                gmx::ArrayRef<const gmx::RVec> x,
3099                                real                           cutoffRequested)
3100 {
3101     gmx_domdec_t *dd;
3102     gmx_ddbox_t   ddbox;
3103     int           d, dim, np;
3104     real          inv_cell_size;
3105     int           LocallyLimited;
3106
3107     dd = cr->dd;
3108
3109     set_ddbox(*dd, false, box, true, x, &ddbox);
3110
3111     LocallyLimited = 0;
3112
3113     for (d = 0; d < dd->ndim; d++)
3114     {
3115         dim = dd->dim[d];
3116
3117         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3118         if (dd->unitCellInfo.ddBoxIsDynamic)
3119         {
3120             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3121         }
3122
3123         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3124
3125         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3126         {
3127             return FALSE;
3128         }
3129
3130         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3131         {
3132             if (np > dd->comm->cd[d].np_dlb)
3133             {
3134                 return FALSE;
3135             }
3136
3137             /* If a current local cell size is smaller than the requested
3138              * cut-off, we could still fix it, but this gets very complicated.
3139              * Without fixing here, we might actually need more checks.
3140              */
3141             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3142             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3143             {
3144                 LocallyLimited = 1;
3145             }
3146         }
3147     }
3148
3149     if (!isDlbDisabled(dd->comm))
3150     {
3151         /* If DLB is not active yet, we don't need to check the grid jumps.
3152          * Actually we shouldn't, because then the grid jump data is not set.
3153          */
3154         if (isDlbOn(dd->comm) &&
3155             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3156         {
3157             LocallyLimited = 1;
3158         }
3159
3160         gmx_sumi(1, &LocallyLimited, cr);
3161
3162         if (LocallyLimited > 0)
3163         {
3164             return FALSE;
3165         }
3166     }
3167
3168     return TRUE;
3169 }
3170
3171 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3172                           const matrix                   box,
3173                           gmx::ArrayRef<const gmx::RVec> x,
3174                           real                           cutoffRequested)
3175 {
3176     gmx_bool bCutoffAllowed;
3177
3178     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3179
3180     if (bCutoffAllowed)
3181     {
3182         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3183     }
3184
3185     return bCutoffAllowed;
3186 }