src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/builder.h"
  54 #include "gromacs/domdec/collect.h"
  55 #include "gromacs/domdec/dlb.h"
  56 #include "gromacs/domdec/dlbtiming.h"
  57 #include "gromacs/domdec/domdec_network.h"
  58 #include "gromacs/domdec/ga2la.h"
  59 #include "gromacs/domdec/gpuhaloexchange.h"
  60 #include "gromacs/domdec/options.h"
  61 #include "gromacs/domdec/partition.h"
  62 #include "gromacs/gmxlib/network.h"
  63 #include "gromacs/gmxlib/nrnb.h"
  64 #include "gromacs/gpu_utils/gpu_utils.h"
  65 #include "gromacs/hardware/hw_info.h"
  66 #include "gromacs/listed_forces/manage_threading.h"
  67 #include "gromacs/math/vec.h"
  68 #include "gromacs/math/vectypes.h"
  69 #include "gromacs/mdlib/calc_verletbuf.h"
  70 #include "gromacs/mdlib/constr.h"
  71 #include "gromacs/mdlib/constraintrange.h"
  72 #include "gromacs/mdlib/updategroups.h"
  73 #include "gromacs/mdlib/vsite.h"
  74 #include "gromacs/mdtypes/commrec.h"
  75 #include "gromacs/mdtypes/forceoutput.h"
  76 #include "gromacs/mdtypes/inputrec.h"
  77 #include "gromacs/mdtypes/mdrunoptions.h"
  78 #include "gromacs/mdtypes/state.h"
  79 #include "gromacs/pbcutil/ishift.h"
  80 #include "gromacs/pbcutil/pbc.h"
  81 #include "gromacs/pulling/pull.h"
  82 #include "gromacs/timing/wallcycle.h"
  83 #include "gromacs/topology/block.h"
  84 #include "gromacs/topology/idef.h"
  85 #include "gromacs/topology/ifunc.h"
  86 #include "gromacs/topology/mtop_lookup.h"
  87 #include "gromacs/topology/mtop_util.h"
  88 #include "gromacs/topology/topology.h"
  89 #include "gromacs/utility/basedefinitions.h"
  90 #include "gromacs/utility/basenetwork.h"
  91 #include "gromacs/utility/cstringutil.h"
  92 #include "gromacs/utility/exceptions.h"
  93 #include "gromacs/utility/fatalerror.h"
  94 #include "gromacs/utility/gmxmpi.h"
  95 #include "gromacs/utility/logger.h"
  96 #include "gromacs/utility/real.h"
  97 #include "gromacs/utility/smalloc.h"
  98 #include "gromacs/utility/strconvert.h"
  99 #include "gromacs/utility/stringstream.h"
 100 #include "gromacs/utility/stringutil.h"
 101 #include "gromacs/utility/textwriter.h"
 102
 103 #include "atomdistribution.h"
 104 #include "box.h"
 105 #include "cellsizes.h"
 106 #include "distribute.h"
 107 #include "domdec_constraints.h"
 108 #include "domdec_internal.h"
 109 #include "domdec_setup.h"
 110 #include "domdec_vsite.h"
 111 #include "redistribute.h"
 112 #include "utility.h"
 113
 114 // TODO remove this when moving domdec into gmx namespace
 115 using gmx::DdRankOrder;
 116 using gmx::DlbOption;
 117 using gmx::DomdecOptions;
 118
 119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 120
 121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 122 #define DD_CGIBS 2
 123
 124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_FLAG_NRCG  65535
 126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 128
 129 /* The DD zone order */
 130 static const ivec dd_zo[DD_MAXZONE] =
 131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 132
 133 /* The non-bonded zone-pair setup for domain decomposition
 134  * The first number is the i-zone, the second number the first j-zone seen by
 135  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 136  * As is, this is for 3D decomposition, where there are 4 i-zones.
 137  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 138  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 139  */
 140 static const int
 141     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 142                                                  {1, 3, 6},
 143                                                  {2, 5, 6},
 144                                                  {3, 5, 7}};
 145
 146
 147 /*
 148    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 149
 150    static void index2xyz(ivec nc,int ind,ivec xyz)
 151    {
 152    xyz[XX] = ind % nc[XX];
 153    xyz[YY] = (ind / nc[XX]) % nc[YY];
 154    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 155    }
 156  */
 157
 158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 159 {
 160     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 161     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 162     xyz[ZZ] = ind % nc[ZZ];
 163 }
 164
 165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 166 {
 167     int                       ddnodeid = -1;
 168
 169     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 170     const int                 ddindex   = dd_index(dd->nc, c);
 171     if (cartSetup.bCartesianPP_PME)
 172     {
 173         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 174     }
 175     else if (cartSetup.bCartesianPP)
 176     {
 177 #if GMX_MPI
 178         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 179 #endif
 180     }
 181     else
 182     {
 183         ddnodeid = ddindex;
 184     }
 185
 186     return ddnodeid;
 187 }
 188
 189 int ddglatnr(const gmx_domdec_t *dd, int i)
 190 {
 191     int atnr;
 192
 193     if (dd == nullptr)
 194     {
 195         atnr = i + 1;
 196     }
 197     else
 198     {
 199         if (i >= dd->comm->atomRanges.numAtomsTotal())
 200         {
 201             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 202         }
 203         atnr = dd->globalAtomIndices[i] + 1;
 204     }
 205
 206     return atnr;
 207 }
 208
 209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 210 {
 211     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 212     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 213 }
 214
 215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 216 {
 217     int i;
 218
 219     if (state->ddp_count != dd->ddp_count)
 220     {
 221         gmx_incons("The MD state does not match the domain decomposition state");
 222     }
 223
 224     state->cg_gl.resize(dd->ncg_home);
 225     for (i = 0; i < dd->ncg_home; i++)
 226     {
 227         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 228     }
 229
 230     state->ddp_count_cg_gl = dd->ddp_count;
 231 }
 232
 233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 234 {
 235     return &dd->comm->zones;
 236 }
 237
 238 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 239 {
 240     return dd.comm->atomRanges.numHomeAtoms();
 241 }
 242
 243 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 244 {
 245     /* We currently set mdatoms entries for all atoms:
 246      * local + non-local + communicated for vsite + constraints
 247      */
 248
 249     return dd->comm->atomRanges.numAtomsTotal();
 250 }
 251
 252 int dd_natoms_vsite(const gmx_domdec_t *dd)
 253 {
 254     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 255 }
 256
 257 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 258 {
 259     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 260     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 261 }
 262
 263 void dd_move_x(gmx_domdec_t             *dd,
 264                const matrix              box,
 265                gmx::ArrayRef<gmx::RVec>  x,
 266                gmx_wallcycle            *wcycle)
 267 {
 268     wallcycle_start(wcycle, ewcMOVEX);
 269
 270     int                    nzone, nat_tot;
 271     gmx_domdec_comm_t     *comm;
 272     gmx_domdec_comm_dim_t *cd;
 273     rvec                   shift = {0, 0, 0};
 274     gmx_bool               bPBC, bScrew;
 275
 276     comm = dd->comm;
 277
 278     nzone   = 1;
 279     nat_tot = comm->atomRanges.numHomeAtoms();
 280     for (int d = 0; d < dd->ndim; d++)
 281     {
 282         bPBC   = (dd->ci[dd->dim[d]] == 0);
 283         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 284         if (bPBC)
 285         {
 286             copy_rvec(box[dd->dim[d]], shift);
 287         }
 288         cd = &comm->cd[d];
 289         for (const gmx_domdec_ind_t &ind : cd->ind)
 290         {
 291             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 292             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 293             int                        n          = 0;
 294             if (!bPBC)
 295             {
 296                 for (int j : ind.index)
 297                 {
 298                     sendBuffer[n] = x[j];
 299                     n++;
 300                 }
 301             }
 302             else if (!bScrew)
 303             {
 304                 for (int j : ind.index)
 305                 {
 306                     /* We need to shift the coordinates */
 307                     for (int d = 0; d < DIM; d++)
 308                     {
 309                         sendBuffer[n][d] = x[j][d] + shift[d];
 310                     }
 311                     n++;
 312                 }
 313             }
 314             else
 315             {
 316                 for (int j : ind.index)
 317                 {
 318                     /* Shift x */
 319                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 320                     /* Rotate y and z.
 321                      * This operation requires a special shift force
 322                      * treatment, which is performed in calc_vir.
 323                      */
 324                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 325                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 326                     n++;
 327                 }
 328             }
 329
 330             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 331
 332             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 333             if (cd->receiveInPlace)
 334             {
 335                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 336             }
 337             else
 338             {
 339                 receiveBuffer = receiveBufferAccess.buffer;
 340             }
 341             /* Send and receive the coordinates */
 342             ddSendrecv(dd, d, dddirBackward,
 343                        sendBuffer, receiveBuffer);
 344
 345             if (!cd->receiveInPlace)
 346             {
 347                 int j = 0;
 348                 for (int zone = 0; zone < nzone; zone++)
 349                 {
 350                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 351                     {
 352                         x[i] = receiveBuffer[j++];
 353                     }
 354                 }
 355             }
 356             nat_tot += ind.nrecv[nzone+1];
 357         }
 358         nzone += nzone;
 359     }
 360
 361     wallcycle_stop(wcycle, ewcMOVEX);
 362 }
 363
 364 void dd_move_f(gmx_domdec_t              *dd,
 365                gmx::ForceWithShiftForces *forceWithShiftForces,
 366                gmx_wallcycle             *wcycle)
 367 {
 368     wallcycle_start(wcycle, ewcMOVEF);
 369
 370     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 371     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 372
 373     gmx_domdec_comm_t       &comm    = *dd->comm;
 374     int                      nzone   = comm.zones.n/2;
 375     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 376     for (int d = dd->ndim-1; d >= 0; d--)
 377     {
 378         /* Only forces in domains near the PBC boundaries need to
 379            consider PBC in the treatment of fshift */
 380         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 381         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 382         /* Determine which shift vector we need */
 383         ivec       vis   = { 0, 0, 0 };
 384         vis[dd->dim[d]]  = 1;
 385         const int  is    = IVEC2IS(vis);
 386
 387         /* Loop over the pulses */
 388         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 389         for (int p = cd.numPulses() - 1; p >= 0; p--)
 390         {
 391             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 392             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 393             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 394
 395             nat_tot                                 -= ind.nrecv[nzone+1];
 396
 397             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 398
 399             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 400             if (cd.receiveInPlace)
 401             {
 402                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 403             }
 404             else
 405             {
 406                 sendBuffer = sendBufferAccess.buffer;
 407                 int j = 0;
 408                 for (int zone = 0; zone < nzone; zone++)
 409                 {
 410                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 411                     {
 412                         sendBuffer[j++] = f[i];
 413                     }
 414                 }
 415             }
 416             /* Communicate the forces */
 417             ddSendrecv(dd, d, dddirForward,
 418                        sendBuffer, receiveBuffer);
 419             /* Add the received forces */
 420             int n = 0;
 421             if (!shiftForcesNeedPbc)
 422             {
 423                 for (int j : ind.index)
 424                 {
 425                     for (int d = 0; d < DIM; d++)
 426                     {
 427                         f[j][d] += receiveBuffer[n][d];
 428                     }
 429                     n++;
 430                 }
 431             }
 432             else if (!applyScrewPbc)
 433             {
 434                 for (int j : ind.index)
 435                 {
 436                     for (int d = 0; d < DIM; d++)
 437                     {
 438                         f[j][d] += receiveBuffer[n][d];
 439                     }
 440                     /* Add this force to the shift force */
 441                     for (int d = 0; d < DIM; d++)
 442                     {
 443                         fshift[is][d] += receiveBuffer[n][d];
 444                     }
 445                     n++;
 446                 }
 447             }
 448             else
 449             {
 450                 for (int j : ind.index)
 451                 {
 452                     /* Rotate the force */
 453                     f[j][XX] += receiveBuffer[n][XX];
 454                     f[j][YY] -= receiveBuffer[n][YY];
 455                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 456                     if (shiftForcesNeedPbc)
 457                     {
 458                         /* Add this force to the shift force */
 459                         for (int d = 0; d < DIM; d++)
 460                         {
 461                             fshift[is][d] += receiveBuffer[n][d];
 462                         }
 463                     }
 464                     n++;
 465                 }
 466             }
 467         }
 468         nzone /= 2;
 469     }
 470     wallcycle_stop(wcycle, ewcMOVEF);
 471 }
 472
 473 /* Convenience function for extracting a real buffer from an rvec buffer
 474  *
 475  * To reduce the number of temporary communication buffers and avoid
 476  * cache polution, we reuse gmx::RVec buffers for storing reals.
 477  * This functions return a real buffer reference with the same number
 478  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 479  */
 480 static gmx::ArrayRef<real>
 481 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 482 {
 483     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 484                                   arrayRef.size());
 485 }
 486
 487 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 488 {
 489     int                    nzone, nat_tot;
 490     gmx_domdec_comm_t     *comm;
 491     gmx_domdec_comm_dim_t *cd;
 492
 493     comm = dd->comm;
 494
 495     nzone   = 1;
 496     nat_tot = comm->atomRanges.numHomeAtoms();
 497     for (int d = 0; d < dd->ndim; d++)
 498     {
 499         cd = &comm->cd[d];
 500         for (const gmx_domdec_ind_t &ind : cd->ind)
 501         {
 502             /* Note: We provision for RVec instead of real, so a factor of 3
 503              * more than needed. The buffer actually already has this size
 504              * and we pass a plain pointer below, so this does not matter.
 505              */
 506             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 507             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 508             int                       n          = 0;
 509             for (int j : ind.index)
 510             {
 511                 sendBuffer[n++] = v[j];
 512             }
 513
 514             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 515
 516             gmx::ArrayRef<real>       receiveBuffer;
 517             if (cd->receiveInPlace)
 518             {
 519                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 520             }
 521             else
 522             {
 523                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 524             }
 525             /* Send and receive the data */
 526             ddSendrecv(dd, d, dddirBackward,
 527                        sendBuffer, receiveBuffer);
 528             if (!cd->receiveInPlace)
 529             {
 530                 int j = 0;
 531                 for (int zone = 0; zone < nzone; zone++)
 532                 {
 533                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 534                     {
 535                         v[i] = receiveBuffer[j++];
 536                     }
 537                 }
 538             }
 539             nat_tot += ind.nrecv[nzone+1];
 540         }
 541         nzone += nzone;
 542     }
 543 }
 544
 545 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 546 {
 547     int                    nzone, nat_tot;
 548     gmx_domdec_comm_t     *comm;
 549     gmx_domdec_comm_dim_t *cd;
 550
 551     comm = dd->comm;
 552
 553     nzone   = comm->zones.n/2;
 554     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 555     for (int d = dd->ndim-1; d >= 0; d--)
 556     {
 557         cd = &comm->cd[d];
 558         for (int p = cd->numPulses() - 1; p >= 0; p--)
 559         {
 560             const gmx_domdec_ind_t &ind = cd->ind[p];
 561
 562             /* Note: We provision for RVec instead of real, so a factor of 3
 563              * more than needed. The buffer actually already has this size
 564              * and we typecast, so this works as intended.
 565              */
 566             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 567             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 568             nat_tot -= ind.nrecv[nzone + 1];
 569
 570             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 571
 572             gmx::ArrayRef<real>       sendBuffer;
 573             if (cd->receiveInPlace)
 574             {
 575                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 576             }
 577             else
 578             {
 579                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 580                 int j = 0;
 581                 for (int zone = 0; zone < nzone; zone++)
 582                 {
 583                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 584                     {
 585                         sendBuffer[j++] = v[i];
 586                     }
 587                 }
 588             }
 589             /* Communicate the forces */
 590             ddSendrecv(dd, d, dddirForward,
 591                        sendBuffer, receiveBuffer);
 592             /* Add the received forces */
 593             int n = 0;
 594             for (int j : ind.index)
 595             {
 596                 v[j] += receiveBuffer[n];
 597                 n++;
 598             }
 599         }
 600         nzone /= 2;
 601     }
 602 }
 603
 604 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 605 {
 606     const gmx_domdec_comm_t &comm       = *dd->comm;
 607     const DDSystemInfo      &systemInfo = comm.systemInfo;
 608
 609     real                     r = -1;
 610     if (systemInfo.haveInterDomainMultiBodyBondeds)
 611     {
 612         if (comm.cutoff_mbody > 0)
 613         {
 614             r = comm.cutoff_mbody;
 615         }
 616         else
 617         {
 618             /* cutoff_mbody=0 means we do not have DLB */
 619             r = comm.cellsize_min[dd->dim[0]];
 620             for (int di = 1; di < dd->ndim; di++)
 621             {
 622                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 623             }
 624             if (comm.systemInfo.filterBondedCommunication)
 625             {
 626                 r = std::max(r, comm.cutoff_mbody);
 627             }
 628             else
 629             {
 630                 r = std::min(r, systemInfo.cutoff);
 631             }
 632         }
 633     }
 634
 635     return r;
 636 }
 637
 638 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 639 {
 640     real r_mb;
 641
 642     r_mb = dd_cutoff_multibody(dd);
 643
 644     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 645 }
 646
 647
 648 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 649                                    const CartesianRankSetup &cartSetup,
 650                                    const ivec                coord,
 651                                    ivec                      coord_pme)
 652 {
 653     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 654     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 655     copy_ivec(coord, coord_pme);
 656     coord_pme[cartSetup.cartpmedim] =
 657         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 658 }
 659
 660 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 661 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 662                             const int          ddCellIndex)
 663 {
 664     const int npp  = ddRankSetup.numPPRanks;
 665     const int npme = ddRankSetup.numRanksDoingPme;
 666
 667     /* Here we assign a PME node to communicate with this DD node
 668      * by assuming that the major index of both is x.
 669      * We add npme/2 to obtain an even distribution.
 670      */
 671     return (ddCellIndex*npme + npme/2)/npp;
 672 }
 673
 674 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 675 {
 676     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 677
 678     int              n = 0;
 679     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 680     {
 681         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 682         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 683         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 684         {
 685             if (debug)
 686             {
 687                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 688             }
 689             pmeRanks[n] = i + 1 + n;
 690             n++;
 691         }
 692     }
 693
 694     return pmeRanks;
 695 }
 696
 697 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 698 {
 699     gmx_domdec_t *dd;
 700     ivec          coords;
 701     int           slab;
 702
 703     dd = cr->dd;
 704     /*
 705        if (dd->comm->bCartesian) {
 706        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 707        dd_coords2pmecoords(dd,coords,coords_pme);
 708        copy_ivec(dd->ntot,nc);
 709        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 710        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 711
 712        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 713        } else {
 714        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 715        }
 716      */
 717     coords[XX] = x;
 718     coords[YY] = y;
 719     coords[ZZ] = z;
 720     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 721
 722     return slab;
 723 }
 724
 725 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 726 {
 727     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 728     ivec                      coords    = { x, y, z };
 729     int                       nodeid    = -1;
 730
 731     if (cartSetup.bCartesianPP_PME)
 732     {
 733 #if GMX_MPI
 734         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 735 #endif
 736     }
 737     else
 738     {
 739         int ddindex = dd_index(cr->dd->nc, coords);
 740         if (cartSetup.bCartesianPP)
 741         {
 742             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 743         }
 744         else
 745         {
 746             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 747             {
 748                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 749             }
 750             else
 751             {
 752                 nodeid = ddindex;
 753             }
 754         }
 755     }
 756
 757     return nodeid;
 758 }
 759
 760 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 761                               const CartesianRankSetup   &cartSetup,
 762                               gmx::ArrayRef<const int>    pmeRanks,
 763                               const t_commrec gmx_unused *cr,
 764                               const int                   sim_nodeid)
 765 {
 766     int                pmenode = -1;
 767
 768     /* This assumes a uniform x domain decomposition grid cell size */
 769     if (cartSetup.bCartesianPP_PME)
 770     {
 771 #if GMX_MPI
 772         ivec coord, coord_pme;
 773         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 774         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 775         {
 776             /* This is a PP rank */
 777             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 778             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 779         }
 780 #endif
 781     }
 782     else if (cartSetup.bCartesianPP)
 783     {
 784         if (sim_nodeid < ddRankSetup.numPPRanks)
 785         {
 786             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 787         }
 788     }
 789     else
 790     {
 791         /* This assumes DD cells with identical x coordinates
 792          * are numbered sequentially.
 793          */
 794         if (pmeRanks.empty())
 795         {
 796             if (sim_nodeid < ddRankSetup.numPPRanks)
 797             {
 798                 /* The DD index equals the nodeid */
 799                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 800             }
 801         }
 802         else
 803         {
 804             int i = 0;
 805             while (sim_nodeid > pmeRanks[i])
 806             {
 807                 i++;
 808             }
 809             if (sim_nodeid < pmeRanks[i])
 810             {
 811                 pmenode = pmeRanks[i];
 812             }
 813         }
 814     }
 815
 816     return pmenode;
 817 }
 818
 819 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 820 {
 821     if (dd != nullptr)
 822     {
 823         return {
 824                    dd->comm->ddRankSetup.npmenodes_x,
 825                    dd->comm->ddRankSetup.npmenodes_y
 826         };
 827     }
 828     else
 829     {
 830         return {
 831                    1, 1
 832         };
 833     }
 834 }
 835
 836 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 837                                  const int        pmenodeid)
 838 {
 839     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 840     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 841     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 842     std::vector<int>          ddranks;
 843     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 844
 845     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 846     {
 847         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 848         {
 849             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 850             {
 851                 if (cartSetup.bCartesianPP_PME)
 852                 {
 853                     ivec coord = { x, y, z };
 854                     ivec coord_pme;
 855                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 856                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 857                         cr->dd->ci[YY] == coord_pme[YY] &&
 858                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 859                     {
 860                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 861                     }
 862                 }
 863                 else
 864                 {
 865                     /* The slab corresponds to the nodeid in the PME group */
 866                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 867                     {
 868                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 869                     }
 870                 }
 871             }
 872         }
 873     }
 874     return ddranks;
 875 }
 876
 877 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 878                                  gmx::ArrayRef<const int>  pmeRanks,
 879                                  const t_commrec          *cr)
 880 {
 881     gmx_bool           bReceive    = TRUE;
 882
 883     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 884     if (ddRankSetup.usePmeOnlyRanks)
 885     {
 886         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 887         if (cartSetup.bCartesianPP_PME)
 888         {
 889 #if GMX_MPI
 890             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 891             ivec coords;
 892             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 893             coords[cartSetup.cartpmedim]++;
 894             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 895             {
 896                 int rank;
 897                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 898                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 899                 {
 900                     /* This is not the last PP node for pmenode */
 901                     bReceive = FALSE;
 902                 }
 903             }
 904 #else
 905             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 906 #endif
 907         }
 908         else
 909         {
 910             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 911             if (cr->sim_nodeid+1 < cr->nnodes &&
 912                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 913             {
 914                 /* This is not the last PP node for pmenode */
 915                 bReceive = FALSE;
 916             }
 917         }
 918     }
 919
 920     return bReceive;
 921 }
 922
 923 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 924 {
 925     gmx_domdec_comm_t *comm;
 926     int                i;
 927
 928     comm = dd->comm;
 929
 930     snew(*dim_f, dd->nc[dim]+1);
 931     (*dim_f)[0] = 0;
 932     for (i = 1; i < dd->nc[dim]; i++)
 933     {
 934         if (comm->slb_frac[dim])
 935         {
 936             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 937         }
 938         else
 939         {
 940             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 941         }
 942     }
 943     (*dim_f)[dd->nc[dim]] = 1;
 944 }
 945
 946 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 947 {
 948     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 949
 950     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 951     {
 952         ddpme->dim = YY;
 953     }
 954     else
 955     {
 956         ddpme->dim = dimind;
 957     }
 958     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 959
 960     ddpme->nslab = (ddpme->dim == 0 ?
 961                     ddRankSetup.npmenodes_x :
 962                     ddRankSetup.npmenodes_y);
 963
 964     if (ddpme->nslab <= 1)
 965     {
 966         return;
 967     }
 968
 969     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
 970     /* Determine for each PME slab the PP location range for dimension dim */
 971     snew(ddpme->pp_min, ddpme->nslab);
 972     snew(ddpme->pp_max, ddpme->nslab);
 973     for (int slab = 0; slab < ddpme->nslab; slab++)
 974     {
 975         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 976         ddpme->pp_max[slab] = 0;
 977     }
 978     for (int i = 0; i < dd->nnodes; i++)
 979     {
 980         ivec xyz;
 981         ddindex2xyz(dd->nc, i, xyz);
 982         /* For y only use our y/z slab.
 983          * This assumes that the PME x grid size matches the DD grid size.
 984          */
 985         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 986         {
 987             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 988             int       slab;
 989             if (dimind == 0)
 990             {
 991                 slab = pmeindex/nso;
 992             }
 993             else
 994             {
 995                 slab = pmeindex % ddpme->nslab;
 996             }
 997             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 998             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 999         }
1000     }
1001
1002     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1003 }
1004
1005 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1006 {
1007     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1008
1009     if (ddRankSetup.ddpme[0].dim == XX)
1010     {
1011         return ddRankSetup.ddpme[0].maxshift;
1012     }
1013     else
1014     {
1015         return 0;
1016     }
1017 }
1018
1019 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1020 {
1021     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1022
1023     if (ddRankSetup.ddpme[0].dim == YY)
1024     {
1025         return ddRankSetup.ddpme[0].maxshift;
1026     }
1027     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1028     {
1029         return ddRankSetup.ddpme[1].maxshift;
1030     }
1031     else
1032     {
1033         return 0;
1034     }
1035 }
1036
1037 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1038 {
1039     return dd.comm->systemInfo.haveSplitConstraints;
1040 }
1041
1042 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1043 {
1044     /* Note that the cycles value can be incorrect, either 0 or some
1045      * extremely large value, when our thread migrated to another core
1046      * with an unsynchronized cycle counter. If this happens less often
1047      * that once per nstlist steps, this will not cause issues, since
1048      * we later subtract the maximum value from the sum over nstlist steps.
1049      * A zero count will slightly lower the total, but that's a small effect.
1050      * Note that the main purpose of the subtraction of the maximum value
1051      * is to avoid throwing off the load balancing when stalls occur due
1052      * e.g. system activity or network congestion.
1053      */
1054     dd->comm->cycl[ddCycl] += cycles;
1055     dd->comm->cycl_n[ddCycl]++;
1056     if (cycles > dd->comm->cycl_max[ddCycl])
1057     {
1058         dd->comm->cycl_max[ddCycl] = cycles;
1059     }
1060 }
1061
1062 #if GMX_MPI
1063 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1064 {
1065     MPI_Comm           c_row;
1066     int                dim, i, rank;
1067     ivec               loc_c;
1068     gmx_bool           bPartOfGroup = FALSE;
1069
1070     dim = dd->dim[dim_ind];
1071     copy_ivec(loc, loc_c);
1072     for (i = 0; i < dd->nc[dim]; i++)
1073     {
1074         loc_c[dim] = i;
1075         rank       = dd_index(dd->nc, loc_c);
1076         if (rank == dd->rank)
1077         {
1078             /* This process is part of the group */
1079             bPartOfGroup = TRUE;
1080         }
1081     }
1082     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1083                    &c_row);
1084     if (bPartOfGroup)
1085     {
1086         dd->comm->mpi_comm_load[dim_ind] = c_row;
1087         if (!isDlbDisabled(dd->comm))
1088         {
1089             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1090
1091             if (dd->ci[dim] == dd->master_ci[dim])
1092             {
1093                 /* This is the root process of this row */
1094                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1095
1096                 RowMaster &rowMaster = *cellsizes.rowMaster;
1097                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1098                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1099                 rowMaster.isCellMin.resize(dd->nc[dim]);
1100                 if (dim_ind > 0)
1101                 {
1102                     rowMaster.bounds.resize(dd->nc[dim]);
1103                 }
1104                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1105             }
1106             else
1107             {
1108                 /* This is not a root process, we only need to receive cell_f */
1109                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1110             }
1111         }
1112         if (dd->ci[dim] == dd->master_ci[dim])
1113         {
1114             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1115         }
1116     }
1117 }
1118 #endif
1119
1120 void dd_setup_dlb_resource_sharing(const t_commrec      *cr,
1121                                    int                   gpu_id)
1122 {
1123 #if GMX_MPI
1124     int           physicalnode_id_hash;
1125     gmx_domdec_t *dd;
1126     MPI_Comm      mpi_comm_pp_physicalnode;
1127
1128     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1129     {
1130         /* Only ranks with short-ranged tasks (currently) use GPUs.
1131          * If we don't have GPUs assigned, there are no resources to share.
1132          */
1133         return;
1134     }
1135
1136     physicalnode_id_hash = gmx_physicalnode_id_hash();
1137
1138     dd = cr->dd;
1139
1140     if (debug)
1141     {
1142         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1143         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1144                 dd->rank, physicalnode_id_hash, gpu_id);
1145     }
1146     /* Split the PP communicator over the physical nodes */
1147     /* TODO: See if we should store this (before), as it's also used for
1148      * for the nodecomm summation.
1149      */
1150     // TODO PhysicalNodeCommunicator could be extended/used to handle
1151     // the need for per-node per-group communicators.
1152     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1153                    &mpi_comm_pp_physicalnode);
1154     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1155                    &dd->comm->mpi_comm_gpu_shared);
1156     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1157     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1158
1159     if (debug)
1160     {
1161         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1162     }
1163
1164     /* Note that some ranks could share a GPU, while others don't */
1165
1166     if (dd->comm->nrank_gpu_shared == 1)
1167     {
1168         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1169     }
1170 #else
1171     GMX_UNUSED_VALUE(cr);
1172     GMX_UNUSED_VALUE(gpu_id);
1173 #endif
1174 }
1175
1176 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1177 {
1178 #if GMX_MPI
1179     int  dim0, dim1, i, j;
1180     ivec loc;
1181
1182     if (debug)
1183     {
1184         fprintf(debug, "Making load communicators\n");
1185     }
1186
1187     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1188     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1189
1190     if (dd->ndim == 0)
1191     {
1192         return;
1193     }
1194
1195     clear_ivec(loc);
1196     make_load_communicator(dd, 0, loc);
1197     if (dd->ndim > 1)
1198     {
1199         dim0 = dd->dim[0];
1200         for (i = 0; i < dd->nc[dim0]; i++)
1201         {
1202             loc[dim0] = i;
1203             make_load_communicator(dd, 1, loc);
1204         }
1205     }
1206     if (dd->ndim > 2)
1207     {
1208         dim0 = dd->dim[0];
1209         for (i = 0; i < dd->nc[dim0]; i++)
1210         {
1211             loc[dim0] = i;
1212             dim1      = dd->dim[1];
1213             for (j = 0; j < dd->nc[dim1]; j++)
1214             {
1215                 loc[dim1] = j;
1216                 make_load_communicator(dd, 2, loc);
1217             }
1218         }
1219     }
1220
1221     if (debug)
1222     {
1223         fprintf(debug, "Finished making load communicators\n");
1224     }
1225 #endif
1226 }
1227
1228 /*! \brief Sets up the relation between neighboring domains and zones */
1229 static void setup_neighbor_relations(gmx_domdec_t *dd)
1230 {
1231     int                     d, dim, i, j, m;
1232     ivec                    tmp, s;
1233     gmx_domdec_zones_t     *zones;
1234     gmx_domdec_ns_ranges_t *izone;
1235     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1236
1237     for (d = 0; d < dd->ndim; d++)
1238     {
1239         dim = dd->dim[d];
1240         copy_ivec(dd->ci, tmp);
1241         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1242         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1243         copy_ivec(dd->ci, tmp);
1244         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1245         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1246         if (debug)
1247         {
1248             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1249                     dd->rank, dim,
1250                     dd->neighbor[d][0],
1251                     dd->neighbor[d][1]);
1252         }
1253     }
1254
1255     int nzone  = (1 << dd->ndim);
1256     int nizone = (1 << std::max(dd->ndim - 1, 0));
1257     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1258
1259     zones = &dd->comm->zones;
1260
1261     for (i = 0; i < nzone; i++)
1262     {
1263         m = 0;
1264         clear_ivec(zones->shift[i]);
1265         for (d = 0; d < dd->ndim; d++)
1266         {
1267             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1268         }
1269     }
1270
1271     zones->n = nzone;
1272     for (i = 0; i < nzone; i++)
1273     {
1274         for (d = 0; d < DIM; d++)
1275         {
1276             s[d] = dd->ci[d] - zones->shift[i][d];
1277             if (s[d] < 0)
1278             {
1279                 s[d] += dd->nc[d];
1280             }
1281             else if (s[d] >= dd->nc[d])
1282             {
1283                 s[d] -= dd->nc[d];
1284             }
1285         }
1286     }
1287     zones->nizone = nizone;
1288     for (i = 0; i < zones->nizone; i++)
1289     {
1290         assert(ddNonbondedZonePairRanges[i][0] == i);
1291
1292         izone     = &zones->izone[i];
1293         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1294          * j-zones up to nzone.
1295          */
1296         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1297         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1298         for (dim = 0; dim < DIM; dim++)
1299         {
1300             if (dd->nc[dim] == 1)
1301             {
1302                 /* All shifts should be allowed */
1303                 izone->shift0[dim] = -1;
1304                 izone->shift1[dim] = 1;
1305             }
1306             else
1307             {
1308                 /* Determine the min/max j-zone shift wrt the i-zone */
1309                 izone->shift0[dim] = 1;
1310                 izone->shift1[dim] = -1;
1311                 for (j = izone->j0; j < izone->j1; j++)
1312                 {
1313                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1314                     if (shift_diff < izone->shift0[dim])
1315                     {
1316                         izone->shift0[dim] = shift_diff;
1317                     }
1318                     if (shift_diff > izone->shift1[dim])
1319                     {
1320                         izone->shift1[dim] = shift_diff;
1321                     }
1322                 }
1323             }
1324         }
1325     }
1326
1327     if (!isDlbDisabled(dd->comm))
1328     {
1329         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1330     }
1331
1332     if (dd->comm->ddSettings.recordLoad)
1333     {
1334         make_load_communicators(dd);
1335     }
1336 }
1337
1338 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1339                                  gmx_domdec_t         *dd,
1340                                  t_commrec gmx_unused *cr,
1341                                  bool gmx_unused       reorder)
1342 {
1343 #if GMX_MPI
1344     gmx_domdec_comm_t  *comm      = dd->comm;
1345     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1346
1347     if (cartSetup.bCartesianPP)
1348     {
1349         /* Set up cartesian communication for the particle-particle part */
1350         GMX_LOG(mdlog.info).appendTextFormatted(
1351                 "Will use a Cartesian communicator: %d x %d x %d",
1352                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1353
1354         ivec periods;
1355         for (int i = 0; i < DIM; i++)
1356         {
1357             periods[i] = TRUE;
1358         }
1359         MPI_Comm comm_cart;
1360         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1361                         &comm_cart);
1362         /* We overwrite the old communicator with the new cartesian one */
1363         cr->mpi_comm_mygroup = comm_cart;
1364     }
1365
1366     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1367     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1368
1369     if (cartSetup.bCartesianPP_PME)
1370     {
1371         /* Since we want to use the original cartesian setup for sim,
1372          * and not the one after split, we need to make an index.
1373          */
1374         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1375         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1376         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1377         /* Get the rank of the DD master,
1378          * above we made sure that the master node is a PP node.
1379          */
1380         int rank;
1381         if (MASTER(cr))
1382         {
1383             rank = dd->rank;
1384         }
1385         else
1386         {
1387             rank = 0;
1388         }
1389         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1390     }
1391     else if (cartSetup.bCartesianPP)
1392     {
1393         if (!comm->ddRankSetup.usePmeOnlyRanks)
1394         {
1395             /* The PP communicator is also
1396              * the communicator for this simulation
1397              */
1398             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1399         }
1400         cr->nodeid = dd->rank;
1401
1402         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1403
1404         /* We need to make an index to go from the coordinates
1405          * to the nodeid of this simulation.
1406          */
1407         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1408         std::vector<int> buf(dd->nnodes);
1409         if (thisRankHasDuty(cr, DUTY_PP))
1410         {
1411             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1412         }
1413         /* Communicate the ddindex to simulation nodeid index */
1414         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1415                       cr->mpi_comm_mysim);
1416
1417         /* Determine the master coordinates and rank.
1418          * The DD master should be the same node as the master of this sim.
1419          */
1420         for (int i = 0; i < dd->nnodes; i++)
1421         {
1422             if (cartSetup.ddindex2simnodeid[i] == 0)
1423             {
1424                 ddindex2xyz(dd->nc, i, dd->master_ci);
1425                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1426             }
1427         }
1428         if (debug)
1429         {
1430             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1431         }
1432     }
1433     else
1434     {
1435         /* No Cartesian communicators */
1436         /* We use the rank in dd->comm->all as DD index */
1437         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1438         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1439         dd->masterrank = 0;
1440         clear_ivec(dd->master_ci);
1441     }
1442 #endif
1443
1444     GMX_LOG(mdlog.info).appendTextFormatted(
1445             "Domain decomposition rank %d, coordinates %d %d %d\n",
1446             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1447     if (debug)
1448     {
1449         fprintf(debug,
1450                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1451                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1452     }
1453 }
1454
1455 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1456                                       t_commrec            *cr)
1457 {
1458 #if GMX_MPI
1459     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1460
1461     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1462     {
1463         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1464         std::vector<int> buf(dd->nnodes);
1465         if (thisRankHasDuty(cr, DUTY_PP))
1466         {
1467             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1468         }
1469         /* Communicate the ddindex to simulation nodeid index */
1470         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1471                       cr->mpi_comm_mysim);
1472     }
1473 #else
1474     GMX_UNUSED_VALUE(dd);
1475     GMX_UNUSED_VALUE(cr);
1476 #endif
1477 }
1478
1479 static CartesianRankSetup
1480 split_communicator(const gmx::MDLogger    &mdlog,
1481                    t_commrec              *cr,
1482                    const DdRankOrder       ddRankOrder,
1483                    bool gmx_unused         reorder,
1484                    const DDRankSetup      &ddRankSetup,
1485                    ivec                    ddCellIndex,
1486                    std::vector<int>       *pmeRanks)
1487 {
1488     CartesianRankSetup cartSetup;
1489
1490     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1491     cartSetup.bCartesianPP_PME = false;
1492
1493     const ivec &numDDCells     = ddRankSetup.numPPCells;
1494     /* Initially we set ntot to the number of PP cells */
1495     copy_ivec(numDDCells, cartSetup.ntot);
1496
1497     if (cartSetup.bCartesianPP)
1498     {
1499         const int numDDCellsTot = ddRankSetup.numPPRanks;
1500         bool      bDiv[DIM];
1501         for (int i = 1; i < DIM; i++)
1502         {
1503             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1504         }
1505         if (bDiv[YY] || bDiv[ZZ])
1506         {
1507             cartSetup.bCartesianPP_PME = TRUE;
1508             /* If we have 2D PME decomposition, which is always in x+y,
1509              * we stack the PME only nodes in z.
1510              * Otherwise we choose the direction that provides the thinnest slab
1511              * of PME only nodes as this will have the least effect
1512              * on the PP communication.
1513              * But for the PME communication the opposite might be better.
1514              */
1515             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1516                              !bDiv[YY] ||
1517                              numDDCells[YY] > numDDCells[ZZ]))
1518             {
1519                 cartSetup.cartpmedim = ZZ;
1520             }
1521             else
1522             {
1523                 cartSetup.cartpmedim = YY;
1524             }
1525             cartSetup.ntot[cartSetup.cartpmedim]
1526                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1527         }
1528         else
1529         {
1530             GMX_LOG(mdlog.info).appendTextFormatted(
1531                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1532                     ddRankSetup.numRanksDoingPme,
1533                     numDDCells[XX], numDDCells[YY],
1534                     numDDCells[XX], numDDCells[ZZ]);
1535             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1536         }
1537     }
1538
1539     if (cartSetup.bCartesianPP_PME)
1540     {
1541 #if GMX_MPI
1542         int  rank;
1543         ivec periods;
1544
1545         GMX_LOG(mdlog.info).appendTextFormatted(
1546                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1547                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1548
1549         for (int i = 0; i < DIM; i++)
1550         {
1551             periods[i] = TRUE;
1552         }
1553         MPI_Comm comm_cart;
1554         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1555                         &comm_cart);
1556         MPI_Comm_rank(comm_cart, &rank);
1557         if (MASTER(cr) && rank != 0)
1558         {
1559             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1560         }
1561
1562         /* With this assigment we loose the link to the original communicator
1563          * which will usually be MPI_COMM_WORLD, unless have multisim.
1564          */
1565         cr->mpi_comm_mysim = comm_cart;
1566         cr->sim_nodeid     = rank;
1567
1568         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1569
1570         GMX_LOG(mdlog.info).appendTextFormatted(
1571                 "Cartesian rank %d, coordinates %d %d %d\n",
1572                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1573
1574         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1575         {
1576             cr->duty = DUTY_PP;
1577         }
1578         if (!ddRankSetup.usePmeOnlyRanks ||
1579             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1580         {
1581             cr->duty = DUTY_PME;
1582         }
1583
1584         /* Split the sim communicator into PP and PME only nodes */
1585         MPI_Comm_split(cr->mpi_comm_mysim,
1586                        getThisRankDuties(cr),
1587                        dd_index(cartSetup.ntot, ddCellIndex),
1588                        &cr->mpi_comm_mygroup);
1589 #else
1590         GMX_UNUSED_VALUE(ddCellIndex);
1591 #endif
1592     }
1593     else
1594     {
1595         switch (ddRankOrder)
1596         {
1597             case DdRankOrder::pp_pme:
1598                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1599                 break;
1600             case DdRankOrder::interleave:
1601                 /* Interleave the PP-only and PME-only ranks */
1602                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1603                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1604                 break;
1605             case DdRankOrder::cartesian:
1606                 break;
1607             default:
1608                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1609         }
1610
1611         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1612         {
1613             cr->duty = DUTY_PME;
1614         }
1615         else
1616         {
1617             cr->duty = DUTY_PP;
1618         }
1619 #if GMX_MPI
1620         /* Split the sim communicator into PP and PME only nodes */
1621         MPI_Comm_split(cr->mpi_comm_mysim,
1622                        getThisRankDuties(cr),
1623                        cr->nodeid,
1624                        &cr->mpi_comm_mygroup);
1625         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1626 #endif
1627     }
1628
1629     GMX_LOG(mdlog.info).appendTextFormatted(
1630             "This rank does only %s work.\n",
1631             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1632
1633     return cartSetup;
1634 }
1635
1636 /*! \brief Makes the PP communicator and the PME communicator, when needed
1637  *
1638  * Returns the Cartesian rank setup.
1639  * Sets \p cr->mpi_comm_mygroup
1640  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1641  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1642  */
1643 static CartesianRankSetup
1644 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1645                        const DDSettings    &ddSettings,
1646                        const DdRankOrder    ddRankOrder,
1647                        const DDRankSetup   &ddRankSetup,
1648                        t_commrec           *cr,
1649                        ivec                 ddCellIndex,
1650                        std::vector<int>    *pmeRanks)
1651 {
1652     CartesianRankSetup cartSetup;
1653
1654     if (ddRankSetup.usePmeOnlyRanks)
1655     {
1656         /* Split the communicator into a PP and PME part */
1657         cartSetup =
1658             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1659                                ddRankSetup, ddCellIndex, pmeRanks);
1660     }
1661     else
1662     {
1663         /* All nodes do PP and PME */
1664         /* We do not require separate communicators */
1665         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1666
1667         cartSetup.bCartesianPP     = false;
1668         cartSetup.bCartesianPP_PME = false;
1669     }
1670
1671     return cartSetup;
1672 }
1673
1674 /*! \brief For PP ranks, sets or makes the communicator
1675  *
1676  * For PME ranks get the rank id.
1677  * For PP only ranks, sets the PME-only rank.
1678  */
1679 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1680                                     const DDSettings         &ddSettings,
1681                                     gmx::ArrayRef<const int>  pmeRanks,
1682                                     t_commrec                *cr,
1683                                     const int                 numAtomsInSystem,
1684                                     gmx_domdec_t             *dd)
1685 {
1686     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1687     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1688
1689     if (thisRankHasDuty(cr, DUTY_PP))
1690     {
1691         /* Copy or make a new PP communicator */
1692
1693         /* We (possibly) reordered the nodes in split_communicator,
1694          * so it is no longer required in make_pp_communicator.
1695          */
1696         const bool useCartesianReorder =
1697             (ddSettings.useCartesianReorder &&
1698              !cartSetup.bCartesianPP_PME);
1699
1700         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1701     }
1702     else
1703     {
1704         receive_ddindex2simnodeid(dd, cr);
1705     }
1706
1707     if (!thisRankHasDuty(cr, DUTY_PME))
1708     {
1709         /* Set up the commnuication to our PME node */
1710         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1711         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1712         if (debug)
1713         {
1714             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1715                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1716         }
1717     }
1718     else
1719     {
1720         dd->pme_nodeid = -1;
1721     }
1722
1723     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1724     if (MASTER(cr))
1725     {
1726         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1727                                                     numAtomsInSystem,
1728                                                     numAtomsInSystem);
1729     }
1730 }
1731
1732 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1733                           const char *dir, int nc, const char *size_string)
1734 {
1735     real  *slb_frac, tot;
1736     int    i, n;
1737     double dbl;
1738
1739     slb_frac = nullptr;
1740     if (nc > 1 && size_string != nullptr)
1741     {
1742         GMX_LOG(mdlog.info).appendTextFormatted(
1743                 "Using static load balancing for the %s direction", dir);
1744         snew(slb_frac, nc);
1745         tot = 0;
1746         for (i = 0; i < nc; i++)
1747         {
1748             dbl = 0;
1749             sscanf(size_string, "%20lf%n", &dbl, &n);
1750             if (dbl == 0)
1751             {
1752                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1753             }
1754             slb_frac[i]  = dbl;
1755             size_string += n;
1756             tot         += slb_frac[i];
1757         }
1758         /* Normalize */
1759         std::string relativeCellSizes = "Relative cell sizes:";
1760         for (i = 0; i < nc; i++)
1761         {
1762             slb_frac[i]       /= tot;
1763             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1764         }
1765         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1766     }
1767
1768     return slb_frac;
1769 }
1770
1771 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1772 {
1773     int                  n     = 0;
1774     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1775     int                  nmol;
1776     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1777     {
1778         for (auto &ilist : extractILists(*ilists, IF_BOND))
1779         {
1780             if (NRAL(ilist.functionType) >  2)
1781             {
1782                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1783             }
1784         }
1785     }
1786
1787     return n;
1788 }
1789
1790 static int dd_getenv(const gmx::MDLogger &mdlog,
1791                      const char *env_var, int def)
1792 {
1793     char *val;
1794     int   nst;
1795
1796     nst = def;
1797     val = getenv(env_var);
1798     if (val)
1799     {
1800         if (sscanf(val, "%20d", &nst) <= 0)
1801         {
1802             nst = 1;
1803         }
1804         GMX_LOG(mdlog.info).appendTextFormatted(
1805                 "Found env.var. %s = %s, using value %d",
1806                 env_var, val, nst);
1807     }
1808
1809     return nst;
1810 }
1811
1812 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1813                                   const t_inputrec    *ir,
1814                                   const gmx::MDLogger &mdlog)
1815 {
1816     if (ir->ePBC == epbcSCREW &&
1817         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1818     {
1819         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1820     }
1821
1822     if (ir->nstlist == 0)
1823     {
1824         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1825     }
1826
1827     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1828     {
1829         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1830     }
1831 }
1832
1833 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1834                                  const ivec         numDomains)
1835 {
1836     real r = ddbox.box_size[XX];
1837     for (int d = 0; d < DIM; d++)
1838     {
1839         if (numDomains[d] > 1)
1840         {
1841             /* Check using the initial average cell size */
1842             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1843         }
1844     }
1845
1846     return r;
1847 }
1848
1849 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1850  */
1851 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1852                                   const std::string   &reasonStr,
1853                                   const gmx::MDLogger &mdlog)
1854 {
1855     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1856     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1857
1858     if (cmdlineDlbState == DlbState::onUser)
1859     {
1860         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1861     }
1862     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1863     {
1864         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1865     }
1866     return DlbState::offForever;
1867 }
1868
1869 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1870  *
1871  * This function parses the parameters of "-dlb" command line option setting
1872  * corresponding state values. Then it checks the consistency of the determined
1873  * state with other run parameters and settings. As a result, the initial state
1874  * may be altered or an error may be thrown if incompatibility of options is detected.
1875  *
1876  * \param [in] mdlog       Logger.
1877  * \param [in] dlbOption   Enum value for the DLB option.
1878  * \param [in] bRecordLoad True if the load balancer is recording load information.
1879  * \param [in] mdrunOptions  Options for mdrun.
1880  * \param [in] ir          Pointer mdrun to input parameters.
1881  * \returns                DLB initial/startup state.
1882  */
1883 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1884                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1885                                          const gmx::MdrunOptions &mdrunOptions,
1886                                          const t_inputrec *ir)
1887 {
1888     DlbState dlbState = DlbState::offCanTurnOn;
1889
1890     switch (dlbOption)
1891     {
1892         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1893         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1894         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1895         default: gmx_incons("Invalid dlbOption enum value");
1896     }
1897
1898     /* Reruns don't support DLB: bail or override auto mode */
1899     if (mdrunOptions.rerun)
1900     {
1901         std::string reasonStr = "it is not supported in reruns.";
1902         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1903     }
1904
1905     /* Unsupported integrators */
1906     if (!EI_DYNAMICS(ir->eI))
1907     {
1908         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1909         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1910     }
1911
1912     /* Without cycle counters we can't time work to balance on */
1913     if (!bRecordLoad)
1914     {
1915         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1916         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1917     }
1918
1919     if (mdrunOptions.reproducible)
1920     {
1921         std::string reasonStr = "you started a reproducible run.";
1922         switch (dlbState)
1923         {
1924             case DlbState::offUser:
1925                 break;
1926             case DlbState::offForever:
1927                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1928                 break;
1929             case DlbState::offCanTurnOn:
1930                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1931             case DlbState::onCanTurnOff:
1932                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1933                 break;
1934             case DlbState::onUser:
1935                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1936             default:
1937                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1938         }
1939     }
1940
1941     return dlbState;
1942 }
1943
1944 static gmx_domdec_comm_t *init_dd_comm()
1945 {
1946     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1947
1948     comm->n_load_have      = 0;
1949     comm->n_load_collect   = 0;
1950
1951     comm->haveTurnedOffDlb = false;
1952
1953     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1954     {
1955         comm->sum_nat[i] = 0;
1956     }
1957     comm->ndecomp   = 0;
1958     comm->nload     = 0;
1959     comm->load_step = 0;
1960     comm->load_sum  = 0;
1961     comm->load_max  = 0;
1962     clear_ivec(comm->load_lim);
1963     comm->load_mdf  = 0;
1964     comm->load_pme  = 0;
1965
1966     /* This should be replaced by a unique pointer */
1967     comm->balanceRegion = ddBalanceRegionAllocate();
1968
1969     return comm;
1970 }
1971
1972 /* Returns whether mtop contains constraints and/or vsites */
1973 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1974 {
1975     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1976     int  nmol;
1977     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1978     {
1979         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1980         {
1981             return true;
1982         }
1983     }
1984
1985     return false;
1986 }
1987
1988 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1989                               const gmx_mtop_t    &mtop,
1990                               const t_inputrec    &inputrec,
1991                               const real           cutoffMargin,
1992                               DDSystemInfo        *systemInfo)
1993 {
1994     /* When we have constraints and/or vsites, it is beneficial to use
1995      * update groups (when possible) to allow independent update of groups.
1996      */
1997     if (!systemHasConstraintsOrVsites(mtop))
1998     {
1999         /* No constraints or vsites, atoms can be updated independently */
2000         return;
2001     }
2002
2003     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2004     systemInfo->useUpdateGroups               =
2005         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2006          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2007
2008     if (systemInfo->useUpdateGroups)
2009     {
2010         int numUpdateGroups = 0;
2011         for (const auto &molblock : mtop.molblock)
2012         {
2013             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2014         }
2015
2016         systemInfo->maxUpdateGroupRadius =
2017             computeMaxUpdateGroupRadius(mtop,
2018                                         systemInfo->updateGroupingPerMoleculetype,
2019                                         maxReferenceTemperature(inputrec));
2020
2021         /* To use update groups, the large domain-to-domain cutoff distance
2022          * should be compatible with the box size.
2023          */
2024         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2025
2026         if (systemInfo->useUpdateGroups)
2027         {
2028             GMX_LOG(mdlog.info).appendTextFormatted(
2029                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2030                     numUpdateGroups,
2031                     mtop.natoms/static_cast<double>(numUpdateGroups),
2032                     systemInfo->maxUpdateGroupRadius);
2033         }
2034         else
2035         {
2036             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2037             systemInfo->updateGroupingPerMoleculetype.clear();
2038         }
2039     }
2040 }
2041
2042 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2043     npbcdim(ePBC2npbcdim(ir.ePBC)),
2044     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2045     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2046     haveScrewPBC(ir.ePBC == epbcSCREW)
2047 {
2048 }
2049
2050 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2051 static bool
2052 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2053                         const bool                                   useUpdateGroups,
2054                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2055 {
2056     if (useUpdateGroups)
2057     {
2058         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2059                            "Need one grouping per moltype");
2060         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2061         {
2062             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2063             {
2064                 return false;
2065             }
2066         }
2067     }
2068     else
2069     {
2070         for (const auto &moltype : mtop.moltype)
2071         {
2072             if (moltype.atoms.nr > 1)
2073             {
2074                 return false;
2075             }
2076         }
2077     }
2078
2079     return true;
2080 }
2081
2082 /*! \brief Generate the simulation system information */
2083 static DDSystemInfo
2084 getSystemInfo(const gmx::MDLogger           &mdlog,
2085               const t_commrec               *cr,
2086               const DomdecOptions           &options,
2087               const gmx_mtop_t              &mtop,
2088               const t_inputrec              &ir,
2089               const matrix                   box,
2090               gmx::ArrayRef<const gmx::RVec> xGlobal)
2091 {
2092     const real         tenPercentMargin = 1.1;
2093
2094     DDSystemInfo       systemInfo;
2095
2096     /* We need to decide on update groups early, as this affects communication distances */
2097     systemInfo.useUpdateGroups = false;
2098     if (ir.cutoff_scheme == ecutsVERLET)
2099     {
2100         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2101         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2102     }
2103
2104     systemInfo.moleculesAreAlwaysWhole         =
2105         moleculesAreAlwaysWhole(mtop,
2106                                 systemInfo.useUpdateGroups,
2107                                 systemInfo.updateGroupingPerMoleculetype);
2108     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2109                                                   mtop.bIntermolecularInteractions);
2110     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2111                                                   multi_body_bondeds_count(&mtop) > 0);
2112
2113     if (systemInfo.useUpdateGroups)
2114     {
2115         systemInfo.haveSplitConstraints = false;
2116         systemInfo.haveSplitSettles     = false;
2117     }
2118     else
2119     {
2120         systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(mtop);
2121         systemInfo.haveSplitSettles     = gmx::inter_charge_group_settles(mtop);
2122     }
2123
2124     if (ir.rlist == 0)
2125     {
2126         /* Set the cut-off to some very large value,
2127          * so we don't need if statements everywhere in the code.
2128          * We use sqrt, since the cut-off is squared in some places.
2129          */
2130         systemInfo.cutoff = GMX_CUTOFF_INF;
2131     }
2132     else
2133     {
2134         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2135     }
2136     systemInfo.minCutoffForMultiBody = 0;
2137
2138     /* Determine the minimum cell size limit, affected by many factors */
2139     systemInfo.cellsizeLimit             = 0;
2140     systemInfo.filterBondedCommunication = false;
2141
2142     /* We do not allow home atoms to move beyond the neighboring domain
2143      * between domain decomposition steps, which limits the cell size.
2144      * Get an estimate of cell size limit due to atom displacement.
2145      * In most cases this is a large overestimate, because it assumes
2146      * non-interaction atoms.
2147      * We set the chance to 1 in a trillion steps.
2148      */
2149     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2150     const real     limitForAtomDisplacement          =
2151         minCellSizeForAtomDisplacement(mtop, ir,
2152                                        systemInfo.updateGroupingPerMoleculetype,
2153                                        c_chanceThatAtomMovesBeyondDomain);
2154     GMX_LOG(mdlog.info).appendTextFormatted(
2155             "Minimum cell size due to atom displacement: %.3f nm",
2156             limitForAtomDisplacement);
2157
2158     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2159                                         limitForAtomDisplacement);
2160
2161     /* TODO: PME decomposition currently requires atoms not to be more than
2162      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2163      *       In nearly all cases, limitForAtomDisplacement will be smaller
2164      *       than 2/3*rlist, so the PME requirement is satisfied.
2165      *       But it would be better for both correctness and performance
2166      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2167      *       Note that we would need to improve the pairlist buffer case.
2168      */
2169
2170     if (systemInfo.haveInterDomainBondeds)
2171     {
2172         if (options.minimumCommunicationRange > 0)
2173         {
2174             systemInfo.minCutoffForMultiBody =
2175                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2176             if (options.useBondedCommunication)
2177             {
2178                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2179             }
2180             else
2181             {
2182                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2183                                              systemInfo.minCutoffForMultiBody);
2184             }
2185         }
2186         else if (ir.bPeriodicMols)
2187         {
2188             /* Can not easily determine the required cut-off */
2189             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2190             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2191         }
2192         else
2193         {
2194             real r_2b, r_mb;
2195
2196             if (MASTER(cr))
2197             {
2198                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2199                                       options.checkBondedInteractions,
2200                                       &r_2b, &r_mb);
2201             }
2202             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2203             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2204
2205             /* We use an initial margin of 10% for the minimum cell size,
2206              * except when we are just below the non-bonded cut-off.
2207              */
2208             if (options.useBondedCommunication)
2209             {
2210                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2211                 {
2212                     const real r_bonded              = std::max(r_2b, r_mb);
2213                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2214                     /* This is the (only) place where we turn on the filtering */
2215                     systemInfo.filterBondedCommunication = true;
2216                 }
2217                 else
2218                 {
2219                     const real r_bonded              = r_mb;
2220                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2221                                                                 systemInfo.cutoff);
2222                 }
2223                 /* We determine cutoff_mbody later */
2224                 systemInfo.increaseMultiBodyCutoff = true;
2225             }
2226             else
2227             {
2228                 /* No special bonded communication,
2229                  * simply increase the DD cut-off.
2230                  */
2231                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2232                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2233                                                             systemInfo.minCutoffForMultiBody);
2234             }
2235         }
2236         GMX_LOG(mdlog.info).appendTextFormatted(
2237                 "Minimum cell size due to bonded interactions: %.3f nm",
2238                 systemInfo.minCutoffForMultiBody);
2239
2240         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2241                                             systemInfo.minCutoffForMultiBody);
2242     }
2243
2244     systemInfo.constraintCommunicationRange = 0;
2245     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2246     {
2247         /* There is a cell size limit due to the constraints (P-LINCS) */
2248         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2249         GMX_LOG(mdlog.info).appendTextFormatted(
2250                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2251                 systemInfo.constraintCommunicationRange);
2252         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2253         {
2254             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2255         }
2256     }
2257     else if (options.constraintCommunicationRange > 0)
2258     {
2259         /* Here we do not check for dd->splitConstraints.
2260          * because one can also set a cell size limit for virtual sites only
2261          * and at this point we don't know yet if there are intercg v-sites.
2262          */
2263         GMX_LOG(mdlog.info).appendTextFormatted(
2264                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2265                 options.constraintCommunicationRange);
2266         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2267     }
2268     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2269                                         systemInfo.constraintCommunicationRange);
2270
2271     return systemInfo;
2272 }
2273
2274 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2275  * implemented. */
2276 static void
2277 checkDDGridSetup(const DDGridSetup   &ddGridSetup,
2278                  const t_commrec     *cr,
2279                  const DomdecOptions &options,
2280                  const DDSettings    &ddSettings,
2281                  const DDSystemInfo  &systemInfo,
2282                  const real           cellsizeLimit,
2283                  const gmx_ddbox_t   &ddbox)
2284 {
2285     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2286     {
2287         char     buf[STRLEN];
2288         gmx_bool bC = (systemInfo.haveSplitConstraints &&
2289                        systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2290         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2291                 !bC ? "-rdd" : "-rcon",
2292                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2293                 bC ? " or your LINCS settings" : "");
2294
2295         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2296                              "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2297                              "%s\n"
2298                              "Look in the log file for details on the domain decomposition",
2299                              cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2300                              cellsizeLimit,
2301                              buf);
2302     }
2303
2304     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2305     if (acs < cellsizeLimit)
2306     {
2307         if (options.numCells[XX] <= 0)
2308         {
2309             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2310         }
2311         else
2312         {
2313             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2314                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2315                                  acs, cellsizeLimit);
2316         }
2317     }
2318
2319     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2320     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2321     {
2322         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2323                              "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2324                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2325     }
2326     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2327     {
2328         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2329                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2330     }
2331 }
2332
2333 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2334 static DDRankSetup
2335 getDDRankSetup(const gmx::MDLogger &mdlog,
2336                t_commrec           *cr,
2337                const DDGridSetup   &ddGridSetup,
2338                const t_inputrec    &ir)
2339 {
2340     GMX_LOG(mdlog.info).appendTextFormatted(
2341             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2342             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2343             ddGridSetup.numPmeOnlyRanks);
2344
2345     DDRankSetup ddRankSetup;
2346
2347     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2348     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2349
2350     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2351     if (ddRankSetup.usePmeOnlyRanks)
2352     {
2353         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2354     }
2355     else
2356     {
2357         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2358     }
2359
2360     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2361     {
2362         /* The following choices should match those
2363          * in comm_cost_est in domdec_setup.c.
2364          * Note that here the checks have to take into account
2365          * that the decomposition might occur in a different order than xyz
2366          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2367          * in which case they will not match those in comm_cost_est,
2368          * but since that is mainly for testing purposes that's fine.
2369          */
2370         if (ddGridSetup.numDDDimensions >= 2 &&
2371             ddGridSetup.ddDimensions[0] == XX &&
2372             ddGridSetup.ddDimensions[1] == YY &&
2373             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2374             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2375             getenv("GMX_PMEONEDD") == nullptr)
2376         {
2377             ddRankSetup.npmedecompdim = 2;
2378             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2379             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2380         }
2381         else
2382         {
2383             /* In case nc is 1 in both x and y we could still choose to
2384              * decompose pme in y instead of x, but we use x for simplicity.
2385              */
2386             ddRankSetup.npmedecompdim = 1;
2387             if (ddGridSetup.ddDimensions[0] == YY)
2388             {
2389                 ddRankSetup.npmenodes_x = 1;
2390                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2391             }
2392             else
2393             {
2394                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2395                 ddRankSetup.npmenodes_y = 1;
2396             }
2397         }
2398         GMX_LOG(mdlog.info).appendTextFormatted(
2399                 "PME domain decomposition: %d x %d x %d",
2400                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2401     }
2402     else
2403     {
2404         ddRankSetup.npmedecompdim = 0;
2405         ddRankSetup.npmenodes_x   = 0;
2406         ddRankSetup.npmenodes_y   = 0;
2407     }
2408
2409     return ddRankSetup;
2410 }
2411
2412 /*! \brief Set the cell size and interaction limits */
2413 static void set_dd_limits(const gmx::MDLogger &mdlog,
2414                           t_commrec *cr, gmx_domdec_t *dd,
2415                           const DomdecOptions &options,
2416                           const DDSettings &ddSettings,
2417                           const DDSystemInfo &systemInfo,
2418                           const DDGridSetup &ddGridSetup,
2419                           const int numPPRanks,
2420                           const gmx_mtop_t *mtop,
2421                           const t_inputrec *ir,
2422                           const gmx_ddbox_t &ddbox)
2423 {
2424     gmx_domdec_comm_t *comm = dd->comm;
2425     comm->ddSettings        = ddSettings;
2426
2427     /* Initialize to GPU share count to 0, might change later */
2428     comm->nrank_gpu_shared = 0;
2429
2430     comm->dlbState         = comm->ddSettings.initialDlbState;
2431     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2432     /* To consider turning DLB on after 2*nstlist steps we need to check
2433      * at partitioning count 3. Thus we need to increase the first count by 2.
2434      */
2435     comm->ddPartioningCountFirstDlbOff += 2;
2436
2437     comm->bPMELoadBalDLBLimits          = FALSE;
2438
2439     /* Allocate the charge group/atom sorting struct */
2440     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2441
2442     comm->systemInfo = systemInfo;
2443
2444     if (systemInfo.useUpdateGroups)
2445     {
2446         /* Note: We would like to use dd->nnodes for the atom count estimate,
2447          *       but that is not yet available here. But this anyhow only
2448          *       affect performance up to the second dd_partition_system call.
2449          */
2450         const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
2451         comm->updateGroupsCog =
2452             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2453                                                    systemInfo.updateGroupingPerMoleculetype,
2454                                                    maxReferenceTemperature(*ir),
2455                                                    homeAtomCountEstimate);
2456     }
2457
2458     /* Set the DD setup given by ddGridSetup */
2459     copy_ivec(ddGridSetup.numDomains, dd->nc);
2460     dd->ndim = ddGridSetup.numDDDimensions;
2461     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2462
2463     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2464
2465     snew(comm->slb_frac, DIM);
2466     if (isDlbDisabled(comm))
2467     {
2468         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2469         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2470         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2471     }
2472
2473     /* Set the multi-body cut-off and cellsize limit for DLB */
2474     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2475     comm->cellsize_limit = systemInfo.cellsizeLimit;
2476     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2477     {
2478         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2479         {
2480             /* Set the bonded communication distance to halfway
2481              * the minimum and the maximum,
2482              * since the extra communication cost is nearly zero.
2483              */
2484             real acs           = average_cellsize_min(ddbox, dd->nc);
2485             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2486             if (!isDlbDisabled(comm))
2487             {
2488                 /* Check if this does not limit the scaling */
2489                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2490                                               options.dlbScaling*acs);
2491             }
2492             if (!systemInfo.filterBondedCommunication)
2493             {
2494                 /* Without bBondComm do not go beyond the n.b. cut-off */
2495                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2496                 if (comm->cellsize_limit >= systemInfo.cutoff)
2497                 {
2498                     /* We don't loose a lot of efficieny
2499                      * when increasing it to the n.b. cut-off.
2500                      * It can even be slightly faster, because we need
2501                      * less checks for the communication setup.
2502                      */
2503                     comm->cutoff_mbody = systemInfo.cutoff;
2504                 }
2505             }
2506             /* Check if we did not end up below our original limit */
2507             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2508                                           systemInfo.minCutoffForMultiBody);
2509
2510             if (comm->cutoff_mbody > comm->cellsize_limit)
2511             {
2512                 comm->cellsize_limit = comm->cutoff_mbody;
2513             }
2514         }
2515         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2516     }
2517
2518     if (debug)
2519     {
2520         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2521                 "cellsize limit %f\n",
2522                 gmx::boolToString(systemInfo.filterBondedCommunication),
2523                 comm->cellsize_limit);
2524     }
2525
2526     if (MASTER(cr))
2527     {
2528         check_dd_restrictions(dd, ir, mdlog);
2529     }
2530 }
2531
2532 void dd_init_bondeds(FILE                       *fplog,
2533                      gmx_domdec_t               *dd,
2534                      const gmx_mtop_t           *mtop,
2535                      const gmx_vsite_t          *vsite,
2536                      const t_inputrec           *ir,
2537                      gmx_bool                    bBCheck,
2538                      cginfo_mb_t                *cginfo_mb)
2539 {
2540     gmx_domdec_comm_t *comm;
2541
2542     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2543
2544     comm = dd->comm;
2545
2546     if (comm->systemInfo.filterBondedCommunication)
2547     {
2548         /* Communicate atoms beyond the cut-off for bonded interactions */
2549         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2550     }
2551     else
2552     {
2553         /* Only communicate atoms based on cut-off */
2554         comm->bondedLinks = nullptr;
2555     }
2556 }
2557
2558 static void writeSettings(gmx::TextWriter       *log,
2559                           gmx_domdec_t          *dd,
2560                           const gmx_mtop_t      *mtop,
2561                           const t_inputrec      *ir,
2562                           gmx_bool               bDynLoadBal,
2563                           real                   dlb_scale,
2564                           const gmx_ddbox_t     *ddbox)
2565 {
2566     gmx_domdec_comm_t *comm;
2567     int                d;
2568     ivec               np;
2569     real               limit, shrink;
2570
2571     comm = dd->comm;
2572
2573     if (bDynLoadBal)
2574     {
2575         log->writeString("The maximum number of communication pulses is:");
2576         for (d = 0; d < dd->ndim; d++)
2577         {
2578             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2579         }
2580         log->ensureLineBreak();
2581         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2582         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2583         log->writeString("The allowed shrink of domain decomposition cells is:");
2584         for (d = 0; d < DIM; d++)
2585         {
2586             if (dd->nc[d] > 1)
2587             {
2588                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2589                 {
2590                     shrink = 0;
2591                 }
2592                 else
2593                 {
2594                     shrink =
2595                         comm->cellsize_min_dlb[d]/
2596                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2597                 }
2598                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2599             }
2600         }
2601         log->ensureLineBreak();
2602     }
2603     else
2604     {
2605         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2606         log->writeString("The initial number of communication pulses is:");
2607         for (d = 0; d < dd->ndim; d++)
2608         {
2609             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2610         }
2611         log->ensureLineBreak();
2612         log->writeString("The initial domain decomposition cell size is:");
2613         for (d = 0; d < DIM; d++)
2614         {
2615             if (dd->nc[d] > 1)
2616             {
2617                 log->writeStringFormatted(" %c %.2f nm",
2618                                           dim2char(d), dd->comm->cellsize_min[d]);
2619             }
2620         }
2621         log->ensureLineBreak();
2622         log->writeLine();
2623     }
2624
2625     const bool haveInterDomainVsites =
2626         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2627
2628     if (comm->systemInfo.haveInterDomainBondeds ||
2629         haveInterDomainVsites ||
2630         comm->systemInfo.haveSplitConstraints ||
2631         comm->systemInfo.haveSplitSettles)
2632     {
2633         std::string decompUnits;
2634         if (comm->systemInfo.useUpdateGroups)
2635         {
2636             decompUnits = "atom groups";
2637         }
2638         else
2639         {
2640             decompUnits = "atoms";
2641         }
2642
2643         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2644         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2645
2646         if (bDynLoadBal)
2647         {
2648             limit = dd->comm->cellsize_limit;
2649         }
2650         else
2651         {
2652             if (dd->unitCellInfo.ddBoxIsDynamic)
2653             {
2654                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2655             }
2656             limit = dd->comm->cellsize_min[XX];
2657             for (d = 1; d < DIM; d++)
2658             {
2659                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2660             }
2661         }
2662
2663         if (comm->systemInfo.haveInterDomainBondeds)
2664         {
2665             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2666                                     "two-body bonded interactions", "(-rdd)",
2667                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2668             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2669                                     "multi-body bonded interactions", "(-rdd)",
2670                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2671         }
2672         if (haveInterDomainVsites)
2673         {
2674             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2675                                     "virtual site constructions", "(-rcon)", limit);
2676         }
2677         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2678         {
2679             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2680                                                        1+ir->nProjOrder);
2681             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2682                                     separation.c_str(), "(-rcon)", limit);
2683         }
2684         log->ensureLineBreak();
2685     }
2686 }
2687
2688 static void logSettings(const gmx::MDLogger &mdlog,
2689                         gmx_domdec_t        *dd,
2690                         const gmx_mtop_t    *mtop,
2691                         const t_inputrec    *ir,
2692                         real                 dlb_scale,
2693                         const gmx_ddbox_t   *ddbox)
2694 {
2695     gmx::StringOutputStream stream;
2696     gmx::TextWriter         log(&stream);
2697     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2698     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2699     {
2700         {
2701             log.ensureEmptyLine();
2702             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2703         }
2704         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2705     }
2706     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2707 }
2708
2709 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2710                                 gmx_domdec_t        *dd,
2711                                 real                 dlb_scale,
2712                                 const t_inputrec    *ir,
2713                                 const gmx_ddbox_t   *ddbox)
2714 {
2715     gmx_domdec_comm_t *comm;
2716     int                d, dim, npulse, npulse_d_max, npulse_d;
2717     gmx_bool           bNoCutOff;
2718
2719     comm = dd->comm;
2720
2721     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2722
2723     /* Determine the maximum number of comm. pulses in one dimension */
2724
2725     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2726
2727     /* Determine the maximum required number of grid pulses */
2728     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2729     {
2730         /* Only a single pulse is required */
2731         npulse = 1;
2732     }
2733     else if (!bNoCutOff && comm->cellsize_limit > 0)
2734     {
2735         /* We round down slightly here to avoid overhead due to the latency
2736          * of extra communication calls when the cut-off
2737          * would be only slightly longer than the cell size.
2738          * Later cellsize_limit is redetermined,
2739          * so we can not miss interactions due to this rounding.
2740          */
2741         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2742     }
2743     else
2744     {
2745         /* There is no cell size limit */
2746         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2747     }
2748
2749     if (!bNoCutOff && npulse > 1)
2750     {
2751         /* See if we can do with less pulses, based on dlb_scale */
2752         npulse_d_max = 0;
2753         for (d = 0; d < dd->ndim; d++)
2754         {
2755             dim      = dd->dim[d];
2756             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2757                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2758             npulse_d_max = std::max(npulse_d_max, npulse_d);
2759         }
2760         npulse = std::min(npulse, npulse_d_max);
2761     }
2762
2763     /* This env var can override npulse */
2764     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2765     if (d > 0)
2766     {
2767         npulse = d;
2768     }
2769
2770     comm->maxpulse       = 1;
2771     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2772     for (d = 0; d < dd->ndim; d++)
2773     {
2774         if (comm->ddSettings.request1DAnd1Pulse)
2775         {
2776             comm->cd[d].np_dlb = 1;
2777         }
2778         else
2779         {
2780             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2781             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2782         }
2783         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2784         {
2785             comm->bVacDLBNoLimit = FALSE;
2786         }
2787     }
2788
2789     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2790     if (!comm->bVacDLBNoLimit)
2791     {
2792         comm->cellsize_limit = std::max(comm->cellsize_limit,
2793                                         comm->systemInfo.cutoff/comm->maxpulse);
2794     }
2795     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2796     /* Set the minimum cell size for each DD dimension */
2797     for (d = 0; d < dd->ndim; d++)
2798     {
2799         if (comm->bVacDLBNoLimit ||
2800             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2801         {
2802             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2803         }
2804         else
2805         {
2806             comm->cellsize_min_dlb[dd->dim[d]] =
2807                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2808         }
2809     }
2810     if (comm->cutoff_mbody <= 0)
2811     {
2812         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2813     }
2814     if (isDlbOn(comm))
2815     {
2816         set_dlb_limits(dd);
2817     }
2818 }
2819
2820 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2821 {
2822     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2823 }
2824
2825 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2826 {
2827     /* If each molecule is a single charge group
2828      * or we use domain decomposition for each periodic dimension,
2829      * we do not need to take pbc into account for the bonded interactions.
2830      */
2831     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2832             !(dd->nc[XX] > 1 &&
2833               dd->nc[YY] > 1 &&
2834               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2835 }
2836
2837 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2838 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2839                                   gmx_domdec_t *dd, real dlb_scale,
2840                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2841                                   const gmx_ddbox_t *ddbox)
2842 {
2843     gmx_domdec_comm_t *comm        = dd->comm;
2844     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2845
2846     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2847     {
2848         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2849         if (ddRankSetup.npmedecompdim >= 2)
2850         {
2851             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2852         }
2853     }
2854     else
2855     {
2856         ddRankSetup.numRanksDoingPme = 0;
2857         if (dd->pme_nodeid >= 0)
2858         {
2859             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2860                                  "Can not have separate PME ranks without PME electrostatics");
2861         }
2862     }
2863
2864     if (debug)
2865     {
2866         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2867     }
2868     if (!isDlbDisabled(comm))
2869     {
2870         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2871     }
2872
2873     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2874
2875     real vol_frac;
2876     if (ir->ePBC == epbcNONE)
2877     {
2878         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2879     }
2880     else
2881     {
2882         vol_frac =
2883             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2884     }
2885     if (debug)
2886     {
2887         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2888     }
2889     int natoms_tot = mtop->natoms;
2890
2891     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
2892                                      static_cast<int>(vol_frac*natoms_tot));
2893 }
2894
2895 /*! \brief Get some important DD parameters which can be modified by env.vars */
2896 static DDSettings
2897 getDDSettings(const gmx::MDLogger     &mdlog,
2898               const DomdecOptions     &options,
2899               const gmx::MdrunOptions &mdrunOptions,
2900               const t_inputrec        &ir)
2901 {
2902     DDSettings ddSettings;
2903
2904     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2905     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2906     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2907     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2908     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2909     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2910     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2911     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2912     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2913     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2914
2915     if (ddSettings.useSendRecv2)
2916     {
2917         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2918     }
2919
2920     if (ddSettings.eFlop)
2921     {
2922         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2923         ddSettings.recordLoad = true;
2924     }
2925     else
2926     {
2927         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2928     }
2929
2930     ddSettings.initialDlbState =
2931         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2932     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2933                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2934
2935     return ddSettings;
2936 }
2937
2938 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2939     unitCellInfo(ir)
2940 {
2941 }
2942
2943 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2944  *
2945  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2946  * generally requires a larger box than other possible decompositions
2947  * with the same rank count, so the calling code might need to decide
2948  * what is the most appropriate way to run the simulation based on
2949  * whether such a DD is possible.
2950  *
2951  * This function works like init_domain_decomposition(), but will not
2952  * give a fatal error, and only uses \c cr for communicating between
2953  * ranks.
2954  *
2955  * It is safe to call before thread-MPI spawns ranks, so that
2956  * thread-MPI can decide whether and how to trigger the GPU halo
2957  * exchange code path. The number of PME ranks, if any, should be set
2958  * in \c options.numPmeRanks.
2959  */
2960 static bool
2961 canMake1DAnd1PulseDomainDecomposition(const DDSettings              &ddSettingsOriginal,
2962                                       const t_commrec               *cr,
2963                                       const int                      numRanksRequested,
2964                                       const DomdecOptions           &options,
2965                                       const gmx_mtop_t              &mtop,
2966                                       const t_inputrec              &ir,
2967                                       const matrix                   box,
2968                                       gmx::ArrayRef<const gmx::RVec> xGlobal)
2969 {
2970     // Ensure we don't write any output from this checking routine
2971     gmx::MDLogger dummyLogger;
2972
2973     DDSystemInfo  systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2974
2975     DDSettings    ddSettings = ddSettingsOriginal;
2976     ddSettings.request1DAnd1Pulse = true;
2977     const real    gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2978                                                                        !isDlbDisabled(ddSettings.initialDlbState),
2979                                                                        options.dlbScaling, ir,
2980                                                                        systemInfo.cellsizeLimit);
2981     gmx_ddbox_t ddbox       = {0};
2982     DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2983                                              ddSettings, systemInfo, gridSetupCellsizeLimit,
2984                                              mtop, ir, box, xGlobal, &ddbox);
2985
2986     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2987
2988     return canMakeDDWith1DAnd1Pulse;
2989 }
2990
2991 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2992 {
2993     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2994     const int  productOfDimensionSizes      = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2995     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2996
2997     const bool hasMax1Pulse =
2998         ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
2999          (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3000
3001     return decompositionHasOneDimension && hasMax1Pulse;
3002
3003 }
3004
3005 namespace gmx
3006 {
3007
3008 // TODO once the functionality stablizes, move this class and
3009 // supporting functionality into builder.cpp
3010 /*! \brief Impl class for DD builder */
3011 class DomainDecompositionBuilder::Impl
3012 {
3013     public:
3014         //! Constructor
3015         Impl(const MDLogger      &mdlog,
3016              t_commrec           *cr,
3017              const DomdecOptions &options,
3018              const MdrunOptions  &mdrunOptions,
3019              bool                 prefer1DAnd1Pulse,
3020              const gmx_mtop_t    &mtop,
3021              const t_inputrec    &ir,
3022              const matrix         box,
3023              ArrayRef<const RVec> xGlobal);
3024
3025         //! Build the resulting DD manager
3026         gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3027
3028         //! Objects used in constructing and configuring DD
3029         //! {
3030         //! Logging object
3031         const MDLogger      &mdlog_;
3032         //! Communication object
3033         t_commrec           *cr_;
3034         //! User-supplied options configuring DD behavior
3035         const DomdecOptions  options_;
3036         //! Global system topology
3037         const gmx_mtop_t    &mtop_;
3038         //! User input values from the tpr file
3039         const t_inputrec    &ir_;
3040         //! }
3041
3042         //! Internal objects used in constructing DD
3043         //! {
3044         //! Settings combined from the user input
3045         DDSettings           ddSettings_;
3046         //! Information derived from the simulation system
3047         DDSystemInfo         systemInfo_;
3048         //! Box structure
3049         gmx_ddbox_t          ddbox_ = { 0 };
3050         //! Organization of the DD grids
3051         DDGridSetup          ddGridSetup_;
3052         //! Organzation of the DD ranks
3053         DDRankSetup          ddRankSetup_;
3054         //! Number of DD cells in each dimension
3055         ivec                 ddCellIndex_ = { 0, 0, 0 };
3056         //! IDs of PME-only ranks
3057         std::vector<int>     pmeRanks_;
3058         //! Contains a valid Cartesian-communicator-based setup, or defaults.
3059         CartesianRankSetup   cartSetup_;
3060         //! }
3061
3062 };
3063
3064 DomainDecompositionBuilder::Impl::Impl(const MDLogger      &mdlog,
3065                                        t_commrec           *cr,
3066                                        const DomdecOptions &options,
3067                                        const MdrunOptions  &mdrunOptions,
3068                                        const bool           prefer1DAnd1Pulse,
3069                                        const gmx_mtop_t    &mtop,
3070                                        const t_inputrec    &ir,
3071                                        const matrix         box,
3072                                        ArrayRef<const RVec> xGlobal)
3073     : mdlog_(mdlog),
3074       cr_(cr),
3075       options_(options),
3076       mtop_(mtop),
3077       ir_(ir)
3078 {
3079     GMX_LOG(mdlog_.info).appendTextFormatted(
3080             "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3081
3082     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3083
3084     if (prefer1DAnd1Pulse &&
3085         canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3086                                               mtop_, ir_, box, xGlobal))
3087     {
3088         ddSettings_.request1DAnd1Pulse = true;
3089     }
3090
3091     if (ddSettings_.eFlop > 1)
3092     {
3093         /* Ensure that we have different random flop counts on different ranks */
3094         srand(1 + cr_->nodeid);
3095     }
3096
3097     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3098
3099     const int numRanksRequested = cr_->nnodes;
3100     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3101
3102     // DD grid setup uses a more different cell size limit for
3103     // automated setup than the one in systemInfo_. The latter is used
3104     // in set_dd_limits() to configure DLB, for example.
3105     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3106                                                                     !isDlbDisabled(ddSettings_.initialDlbState),
3107                                                                     options_.dlbScaling, ir_,
3108                                                                     systemInfo_.cellsizeLimit);
3109     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3110                                   ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3111                                   mtop_, ir_, box, xGlobal, &ddbox_);
3112     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3113
3114     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3115
3116     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3117
3118     /* Generate the group communicator, also decides the duty of each rank */
3119     cartSetup_   =
3120         makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3121                                ddRankSetup_, cr_,
3122                                ddCellIndex_, &pmeRanks_);
3123 }
3124
3125 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3126 {
3127     gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3128
3129     copy_ivec(ddCellIndex_, dd->ci);
3130
3131     dd->comm = init_dd_comm();
3132
3133     dd->comm->ddRankSetup        = ddRankSetup_;
3134     dd->comm->cartesianRankSetup = cartSetup_;
3135
3136     set_dd_limits(mdlog_, cr_, dd, options_,
3137                   ddSettings_, systemInfo_,
3138                   ddGridSetup_,
3139                   ddRankSetup_.numPPRanks,
3140                   &mtop_, &ir_,
3141                   ddbox_);
3142
3143     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3144
3145     if (thisRankHasDuty(cr_, DUTY_PP))
3146     {
3147         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3148
3149         setup_neighbor_relations(dd);
3150     }
3151
3152     /* Set overallocation to avoid frequent reallocation of arrays */
3153     set_over_alloc_dd(TRUE);
3154
3155     dd->atomSets = atomSets;
3156
3157     return dd;
3158 }
3159
3160 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger      &mdlog,
3161                                                        t_commrec           *cr,
3162                                                        const DomdecOptions &options,
3163                                                        const MdrunOptions  &mdrunOptions,
3164                                                        const bool           prefer1DAnd1Pulse,
3165                                                        const gmx_mtop_t    &mtop,
3166                                                        const t_inputrec    &ir,
3167                                                        const matrix         box,
3168                                                        ArrayRef<const RVec> xGlobal)
3169     : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3170 {
3171 }
3172
3173 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3174 {
3175     return impl_->build(atomSets);
3176 }
3177
3178 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3179
3180 } // namespace gmx
3181
3182 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3183                                const matrix                   box,
3184                                gmx::ArrayRef<const gmx::RVec> x,
3185                                real                           cutoffRequested)
3186 {
3187     gmx_domdec_t *dd;
3188     gmx_ddbox_t   ddbox;
3189     int           d, dim, np;
3190     real          inv_cell_size;
3191     int           LocallyLimited;
3192
3193     dd = cr->dd;
3194
3195     set_ddbox(*dd, false, box, true, x, &ddbox);
3196
3197     LocallyLimited = 0;
3198
3199     for (d = 0; d < dd->ndim; d++)
3200     {
3201         dim = dd->dim[d];
3202
3203         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3204         if (dd->unitCellInfo.ddBoxIsDynamic)
3205         {
3206             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3207         }
3208
3209         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3210
3211         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3212         {
3213             return FALSE;
3214         }
3215
3216         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3217         {
3218             if (np > dd->comm->cd[d].np_dlb)
3219             {
3220                 return FALSE;
3221             }
3222
3223             /* If a current local cell size is smaller than the requested
3224              * cut-off, we could still fix it, but this gets very complicated.
3225              * Without fixing here, we might actually need more checks.
3226              */
3227             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3228             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3229             {
3230                 LocallyLimited = 1;
3231             }
3232         }
3233     }
3234
3235     if (!isDlbDisabled(dd->comm))
3236     {
3237         /* If DLB is not active yet, we don't need to check the grid jumps.
3238          * Actually we shouldn't, because then the grid jump data is not set.
3239          */
3240         if (isDlbOn(dd->comm) &&
3241             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3242         {
3243             LocallyLimited = 1;
3244         }
3245
3246         gmx_sumi(1, &LocallyLimited, cr);
3247
3248         if (LocallyLimited > 0)
3249         {
3250             return FALSE;
3251         }
3252     }
3253
3254     return TRUE;
3255 }
3256
3257 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3258                           const matrix                   box,
3259                           gmx::ArrayRef<const gmx::RVec> x,
3260                           real                           cutoffRequested)
3261 {
3262     gmx_bool bCutoffAllowed;
3263
3264     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3265
3266     if (bCutoffAllowed)
3267     {
3268         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3269     }
3270
3271     return bCutoffAllowed;
3272 }