src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/builder.h"
  54 #include "gromacs/domdec/collect.h"
  55 #include "gromacs/domdec/dlb.h"
  56 #include "gromacs/domdec/dlbtiming.h"
  57 #include "gromacs/domdec/domdec_network.h"
  58 #include "gromacs/domdec/ga2la.h"
  59 #include "gromacs/domdec/gpuhaloexchange.h"
  60 #include "gromacs/domdec/options.h"
  61 #include "gromacs/domdec/partition.h"
  62 #include "gromacs/gmxlib/network.h"
  63 #include "gromacs/gmxlib/nrnb.h"
  64 #include "gromacs/gpu_utils/gpu_utils.h"
  65 #include "gromacs/hardware/hw_info.h"
  66 #include "gromacs/listed_forces/manage_threading.h"
  67 #include "gromacs/math/vec.h"
  68 #include "gromacs/math/vectypes.h"
  69 #include "gromacs/mdlib/calc_verletbuf.h"
  70 #include "gromacs/mdlib/constr.h"
  71 #include "gromacs/mdlib/constraintrange.h"
  72 #include "gromacs/mdlib/updategroups.h"
  73 #include "gromacs/mdlib/vsite.h"
  74 #include "gromacs/mdtypes/commrec.h"
  75 #include "gromacs/mdtypes/forceoutput.h"
  76 #include "gromacs/mdtypes/inputrec.h"
  77 #include "gromacs/mdtypes/mdrunoptions.h"
  78 #include "gromacs/mdtypes/state.h"
  79 #include "gromacs/pbcutil/ishift.h"
  80 #include "gromacs/pbcutil/pbc.h"
  81 #include "gromacs/pulling/pull.h"
  82 #include "gromacs/timing/wallcycle.h"
  83 #include "gromacs/topology/block.h"
  84 #include "gromacs/topology/idef.h"
  85 #include "gromacs/topology/ifunc.h"
  86 #include "gromacs/topology/mtop_lookup.h"
  87 #include "gromacs/topology/mtop_util.h"
  88 #include "gromacs/topology/topology.h"
  89 #include "gromacs/utility/basedefinitions.h"
  90 #include "gromacs/utility/basenetwork.h"
  91 #include "gromacs/utility/cstringutil.h"
  92 #include "gromacs/utility/exceptions.h"
  93 #include "gromacs/utility/fatalerror.h"
  94 #include "gromacs/utility/gmxmpi.h"
  95 #include "gromacs/utility/logger.h"
  96 #include "gromacs/utility/real.h"
  97 #include "gromacs/utility/smalloc.h"
  98 #include "gromacs/utility/strconvert.h"
  99 #include "gromacs/utility/stringstream.h"
 100 #include "gromacs/utility/stringutil.h"
 101 #include "gromacs/utility/textwriter.h"
 102
 103 #include "atomdistribution.h"
 104 #include "box.h"
 105 #include "cellsizes.h"
 106 #include "distribute.h"
 107 #include "domdec_constraints.h"
 108 #include "domdec_internal.h"
 109 #include "domdec_setup.h"
 110 #include "domdec_vsite.h"
 111 #include "redistribute.h"
 112 #include "utility.h"
 113
 114 // TODO remove this when moving domdec into gmx namespace
 115 using gmx::DdRankOrder;
 116 using gmx::DlbOption;
 117 using gmx::DomdecOptions;
 118
 119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 120
 121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 122 #define DD_CGIBS 2
 123
 124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 125 #define DD_FLAG_NRCG  65535
 126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 128
 129 /* The DD zone order */
 130 static const ivec dd_zo[DD_MAXZONE] =
 131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 132
 133 /* The non-bonded zone-pair setup for domain decomposition
 134  * The first number is the i-zone, the second number the first j-zone seen by
 135  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 136  * As is, this is for 3D decomposition, where there are 4 i-zones.
 137  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 138  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 139  */
 140 static const int
 141     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 142                                                  {1, 3, 6},
 143                                                  {2, 5, 6},
 144                                                  {3, 5, 7}};
 145
 146
 147 /*
 148    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 149
 150    static void index2xyz(ivec nc,int ind,ivec xyz)
 151    {
 152    xyz[XX] = ind % nc[XX];
 153    xyz[YY] = (ind / nc[XX]) % nc[YY];
 154    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 155    }
 156  */
 157
 158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 159 {
 160     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 161     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 162     xyz[ZZ] = ind % nc[ZZ];
 163 }
 164
 165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 166 {
 167     int                       ddnodeid = -1;
 168
 169     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 170     const int                 ddindex   = dd_index(dd->nc, c);
 171     if (cartSetup.bCartesianPP_PME)
 172     {
 173         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 174     }
 175     else if (cartSetup.bCartesianPP)
 176     {
 177 #if GMX_MPI
 178         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 179 #endif
 180     }
 181     else
 182     {
 183         ddnodeid = ddindex;
 184     }
 185
 186     return ddnodeid;
 187 }
 188
 189 int ddglatnr(const gmx_domdec_t *dd, int i)
 190 {
 191     int atnr;
 192
 193     if (dd == nullptr)
 194     {
 195         atnr = i + 1;
 196     }
 197     else
 198     {
 199         if (i >= dd->comm->atomRanges.numAtomsTotal())
 200         {
 201             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 202         }
 203         atnr = dd->globalAtomIndices[i] + 1;
 204     }
 205
 206     return atnr;
 207 }
 208
 209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 210 {
 211     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 212     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 213 }
 214
 215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 216 {
 217     int i;
 218
 219     if (state->ddp_count != dd->ddp_count)
 220     {
 221         gmx_incons("The MD state does not match the domain decomposition state");
 222     }
 223
 224     state->cg_gl.resize(dd->ncg_home);
 225     for (i = 0; i < dd->ncg_home; i++)
 226     {
 227         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 228     }
 229
 230     state->ddp_count_cg_gl = dd->ddp_count;
 231 }
 232
 233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 234 {
 235     return &dd->comm->zones;
 236 }
 237
 238 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 239 {
 240     return dd.comm->atomRanges.numHomeAtoms();
 241 }
 242
 243 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 244 {
 245     /* We currently set mdatoms entries for all atoms:
 246      * local + non-local + communicated for vsite + constraints
 247      */
 248
 249     return dd->comm->atomRanges.numAtomsTotal();
 250 }
 251
 252 int dd_natoms_vsite(const gmx_domdec_t *dd)
 253 {
 254     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 255 }
 256
 257 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 258 {
 259     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 260     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 261 }
 262
 263 void dd_move_x(gmx_domdec_t             *dd,
 264                const matrix              box,
 265                gmx::ArrayRef<gmx::RVec>  x,
 266                gmx_wallcycle            *wcycle)
 267 {
 268     wallcycle_start(wcycle, ewcMOVEX);
 269
 270     int                    nzone, nat_tot;
 271     gmx_domdec_comm_t     *comm;
 272     gmx_domdec_comm_dim_t *cd;
 273     rvec                   shift = {0, 0, 0};
 274     gmx_bool               bPBC, bScrew;
 275
 276     comm = dd->comm;
 277
 278     nzone   = 1;
 279     nat_tot = comm->atomRanges.numHomeAtoms();
 280     for (int d = 0; d < dd->ndim; d++)
 281     {
 282         bPBC   = (dd->ci[dd->dim[d]] == 0);
 283         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 284         if (bPBC)
 285         {
 286             copy_rvec(box[dd->dim[d]], shift);
 287         }
 288         cd = &comm->cd[d];
 289         for (const gmx_domdec_ind_t &ind : cd->ind)
 290         {
 291             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 292             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 293             int                        n          = 0;
 294             if (!bPBC)
 295             {
 296                 for (int j : ind.index)
 297                 {
 298                     sendBuffer[n] = x[j];
 299                     n++;
 300                 }
 301             }
 302             else if (!bScrew)
 303             {
 304                 for (int j : ind.index)
 305                 {
 306                     /* We need to shift the coordinates */
 307                     for (int d = 0; d < DIM; d++)
 308                     {
 309                         sendBuffer[n][d] = x[j][d] + shift[d];
 310                     }
 311                     n++;
 312                 }
 313             }
 314             else
 315             {
 316                 for (int j : ind.index)
 317                 {
 318                     /* Shift x */
 319                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 320                     /* Rotate y and z.
 321                      * This operation requires a special shift force
 322                      * treatment, which is performed in calc_vir.
 323                      */
 324                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 325                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 326                     n++;
 327                 }
 328             }
 329
 330             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 331
 332             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 333             if (cd->receiveInPlace)
 334             {
 335                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 336             }
 337             else
 338             {
 339                 receiveBuffer = receiveBufferAccess.buffer;
 340             }
 341             /* Send and receive the coordinates */
 342             ddSendrecv(dd, d, dddirBackward,
 343                        sendBuffer, receiveBuffer);
 344
 345             if (!cd->receiveInPlace)
 346             {
 347                 int j = 0;
 348                 for (int zone = 0; zone < nzone; zone++)
 349                 {
 350                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 351                     {
 352                         x[i] = receiveBuffer[j++];
 353                     }
 354                 }
 355             }
 356             nat_tot += ind.nrecv[nzone+1];
 357         }
 358         nzone += nzone;
 359     }
 360
 361     wallcycle_stop(wcycle, ewcMOVEX);
 362 }
 363
 364 void dd_move_f(gmx_domdec_t              *dd,
 365                gmx::ForceWithShiftForces *forceWithShiftForces,
 366                gmx_wallcycle             *wcycle)
 367 {
 368     wallcycle_start(wcycle, ewcMOVEF);
 369
 370     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 371     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 372
 373     gmx_domdec_comm_t       &comm    = *dd->comm;
 374     int                      nzone   = comm.zones.n/2;
 375     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 376     for (int d = dd->ndim-1; d >= 0; d--)
 377     {
 378         /* Only forces in domains near the PBC boundaries need to
 379            consider PBC in the treatment of fshift */
 380         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 381         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 382         /* Determine which shift vector we need */
 383         ivec       vis   = { 0, 0, 0 };
 384         vis[dd->dim[d]]  = 1;
 385         const int  is    = IVEC2IS(vis);
 386
 387         /* Loop over the pulses */
 388         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 389         for (int p = cd.numPulses() - 1; p >= 0; p--)
 390         {
 391             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 392             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 393             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 394
 395             nat_tot                                 -= ind.nrecv[nzone+1];
 396
 397             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 398
 399             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 400             if (cd.receiveInPlace)
 401             {
 402                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 403             }
 404             else
 405             {
 406                 sendBuffer = sendBufferAccess.buffer;
 407                 int j = 0;
 408                 for (int zone = 0; zone < nzone; zone++)
 409                 {
 410                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 411                     {
 412                         sendBuffer[j++] = f[i];
 413                     }
 414                 }
 415             }
 416             /* Communicate the forces */
 417             ddSendrecv(dd, d, dddirForward,
 418                        sendBuffer, receiveBuffer);
 419             /* Add the received forces */
 420             int n = 0;
 421             if (!shiftForcesNeedPbc)
 422             {
 423                 for (int j : ind.index)
 424                 {
 425                     for (int d = 0; d < DIM; d++)
 426                     {
 427                         f[j][d] += receiveBuffer[n][d];
 428                     }
 429                     n++;
 430                 }
 431             }
 432             else if (!applyScrewPbc)
 433             {
 434                 for (int j : ind.index)
 435                 {
 436                     for (int d = 0; d < DIM; d++)
 437                     {
 438                         f[j][d] += receiveBuffer[n][d];
 439                     }
 440                     /* Add this force to the shift force */
 441                     for (int d = 0; d < DIM; d++)
 442                     {
 443                         fshift[is][d] += receiveBuffer[n][d];
 444                     }
 445                     n++;
 446                 }
 447             }
 448             else
 449             {
 450                 for (int j : ind.index)
 451                 {
 452                     /* Rotate the force */
 453                     f[j][XX] += receiveBuffer[n][XX];
 454                     f[j][YY] -= receiveBuffer[n][YY];
 455                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 456                     if (shiftForcesNeedPbc)
 457                     {
 458                         /* Add this force to the shift force */
 459                         for (int d = 0; d < DIM; d++)
 460                         {
 461                             fshift[is][d] += receiveBuffer[n][d];
 462                         }
 463                     }
 464                     n++;
 465                 }
 466             }
 467         }
 468         nzone /= 2;
 469     }
 470     wallcycle_stop(wcycle, ewcMOVEF);
 471 }
 472
 473 /* Convenience function for extracting a real buffer from an rvec buffer
 474  *
 475  * To reduce the number of temporary communication buffers and avoid
 476  * cache polution, we reuse gmx::RVec buffers for storing reals.
 477  * This functions return a real buffer reference with the same number
 478  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 479  */
 480 static gmx::ArrayRef<real>
 481 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 482 {
 483     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 484                                   arrayRef.size());
 485 }
 486
 487 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 488 {
 489     int                    nzone, nat_tot;
 490     gmx_domdec_comm_t     *comm;
 491     gmx_domdec_comm_dim_t *cd;
 492
 493     comm = dd->comm;
 494
 495     nzone   = 1;
 496     nat_tot = comm->atomRanges.numHomeAtoms();
 497     for (int d = 0; d < dd->ndim; d++)
 498     {
 499         cd = &comm->cd[d];
 500         for (const gmx_domdec_ind_t &ind : cd->ind)
 501         {
 502             /* Note: We provision for RVec instead of real, so a factor of 3
 503              * more than needed. The buffer actually already has this size
 504              * and we pass a plain pointer below, so this does not matter.
 505              */
 506             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 507             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 508             int                       n          = 0;
 509             for (int j : ind.index)
 510             {
 511                 sendBuffer[n++] = v[j];
 512             }
 513
 514             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 515
 516             gmx::ArrayRef<real>       receiveBuffer;
 517             if (cd->receiveInPlace)
 518             {
 519                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 520             }
 521             else
 522             {
 523                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 524             }
 525             /* Send and receive the data */
 526             ddSendrecv(dd, d, dddirBackward,
 527                        sendBuffer, receiveBuffer);
 528             if (!cd->receiveInPlace)
 529             {
 530                 int j = 0;
 531                 for (int zone = 0; zone < nzone; zone++)
 532                 {
 533                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 534                     {
 535                         v[i] = receiveBuffer[j++];
 536                     }
 537                 }
 538             }
 539             nat_tot += ind.nrecv[nzone+1];
 540         }
 541         nzone += nzone;
 542     }
 543 }
 544
 545 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 546 {
 547     int                    nzone, nat_tot;
 548     gmx_domdec_comm_t     *comm;
 549     gmx_domdec_comm_dim_t *cd;
 550
 551     comm = dd->comm;
 552
 553     nzone   = comm->zones.n/2;
 554     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 555     for (int d = dd->ndim-1; d >= 0; d--)
 556     {
 557         cd = &comm->cd[d];
 558         for (int p = cd->numPulses() - 1; p >= 0; p--)
 559         {
 560             const gmx_domdec_ind_t &ind = cd->ind[p];
 561
 562             /* Note: We provision for RVec instead of real, so a factor of 3
 563              * more than needed. The buffer actually already has this size
 564              * and we typecast, so this works as intended.
 565              */
 566             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 567             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 568             nat_tot -= ind.nrecv[nzone + 1];
 569
 570             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 571
 572             gmx::ArrayRef<real>       sendBuffer;
 573             if (cd->receiveInPlace)
 574             {
 575                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 576             }
 577             else
 578             {
 579                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 580                 int j = 0;
 581                 for (int zone = 0; zone < nzone; zone++)
 582                 {
 583                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 584                     {
 585                         sendBuffer[j++] = v[i];
 586                     }
 587                 }
 588             }
 589             /* Communicate the forces */
 590             ddSendrecv(dd, d, dddirForward,
 591                        sendBuffer, receiveBuffer);
 592             /* Add the received forces */
 593             int n = 0;
 594             for (int j : ind.index)
 595             {
 596                 v[j] += receiveBuffer[n];
 597                 n++;
 598             }
 599         }
 600         nzone /= 2;
 601     }
 602 }
 603
 604 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 605 {
 606     const gmx_domdec_comm_t &comm       = *dd->comm;
 607     const DDSystemInfo      &systemInfo = comm.systemInfo;
 608
 609     real                     r = -1;
 610     if (systemInfo.haveInterDomainMultiBodyBondeds)
 611     {
 612         if (comm.cutoff_mbody > 0)
 613         {
 614             r = comm.cutoff_mbody;
 615         }
 616         else
 617         {
 618             /* cutoff_mbody=0 means we do not have DLB */
 619             r = comm.cellsize_min[dd->dim[0]];
 620             for (int di = 1; di < dd->ndim; di++)
 621             {
 622                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 623             }
 624             if (comm.systemInfo.filterBondedCommunication)
 625             {
 626                 r = std::max(r, comm.cutoff_mbody);
 627             }
 628             else
 629             {
 630                 r = std::min(r, systemInfo.cutoff);
 631             }
 632         }
 633     }
 634
 635     return r;
 636 }
 637
 638 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 639 {
 640     real r_mb;
 641
 642     r_mb = dd_cutoff_multibody(dd);
 643
 644     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 645 }
 646
 647
 648 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 649                                    const CartesianRankSetup &cartSetup,
 650                                    const ivec                coord,
 651                                    ivec                      coord_pme)
 652 {
 653     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 654     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 655     copy_ivec(coord, coord_pme);
 656     coord_pme[cartSetup.cartpmedim] =
 657         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 658 }
 659
 660 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 661 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 662                             const int          ddCellIndex)
 663 {
 664     const int npp  = ddRankSetup.numPPRanks;
 665     const int npme = ddRankSetup.numRanksDoingPme;
 666
 667     /* Here we assign a PME node to communicate with this DD node
 668      * by assuming that the major index of both is x.
 669      * We add npme/2 to obtain an even distribution.
 670      */
 671     return (ddCellIndex*npme + npme/2)/npp;
 672 }
 673
 674 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 675 {
 676     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 677
 678     int              n = 0;
 679     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 680     {
 681         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 682         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 683         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 684         {
 685             if (debug)
 686             {
 687                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 688             }
 689             pmeRanks[n] = i + 1 + n;
 690             n++;
 691         }
 692     }
 693
 694     return pmeRanks;
 695 }
 696
 697 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 698 {
 699     gmx_domdec_t *dd;
 700     ivec          coords;
 701     int           slab;
 702
 703     dd = cr->dd;
 704     /*
 705        if (dd->comm->bCartesian) {
 706        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 707        dd_coords2pmecoords(dd,coords,coords_pme);
 708        copy_ivec(dd->ntot,nc);
 709        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 710        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 711
 712        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 713        } else {
 714        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 715        }
 716      */
 717     coords[XX] = x;
 718     coords[YY] = y;
 719     coords[ZZ] = z;
 720     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 721
 722     return slab;
 723 }
 724
 725 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 726 {
 727     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 728     ivec                      coords    = { x, y, z };
 729     int                       nodeid    = -1;
 730
 731     if (cartSetup.bCartesianPP_PME)
 732     {
 733 #if GMX_MPI
 734         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 735 #endif
 736     }
 737     else
 738     {
 739         int ddindex = dd_index(cr->dd->nc, coords);
 740         if (cartSetup.bCartesianPP)
 741         {
 742             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 743         }
 744         else
 745         {
 746             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 747             {
 748                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 749             }
 750             else
 751             {
 752                 nodeid = ddindex;
 753             }
 754         }
 755     }
 756
 757     return nodeid;
 758 }
 759
 760 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 761                               const CartesianRankSetup   &cartSetup,
 762                               gmx::ArrayRef<const int>    pmeRanks,
 763                               const t_commrec gmx_unused *cr,
 764                               const int                   sim_nodeid)
 765 {
 766     int                pmenode = -1;
 767
 768     /* This assumes a uniform x domain decomposition grid cell size */
 769     if (cartSetup.bCartesianPP_PME)
 770     {
 771 #if GMX_MPI
 772         ivec coord, coord_pme;
 773         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 774         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 775         {
 776             /* This is a PP rank */
 777             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 778             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 779         }
 780 #endif
 781     }
 782     else if (cartSetup.bCartesianPP)
 783     {
 784         if (sim_nodeid < ddRankSetup.numPPRanks)
 785         {
 786             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 787         }
 788     }
 789     else
 790     {
 791         /* This assumes DD cells with identical x coordinates
 792          * are numbered sequentially.
 793          */
 794         if (pmeRanks.empty())
 795         {
 796             if (sim_nodeid < ddRankSetup.numPPRanks)
 797             {
 798                 /* The DD index equals the nodeid */
 799                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 800             }
 801         }
 802         else
 803         {
 804             int i = 0;
 805             while (sim_nodeid > pmeRanks[i])
 806             {
 807                 i++;
 808             }
 809             if (sim_nodeid < pmeRanks[i])
 810             {
 811                 pmenode = pmeRanks[i];
 812             }
 813         }
 814     }
 815
 816     return pmenode;
 817 }
 818
 819 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 820 {
 821     if (dd != nullptr)
 822     {
 823         return {
 824                    dd->comm->ddRankSetup.npmenodes_x,
 825                    dd->comm->ddRankSetup.npmenodes_y
 826         };
 827     }
 828     else
 829     {
 830         return {
 831                    1, 1
 832         };
 833     }
 834 }
 835
 836 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 837                                  const int        pmenodeid)
 838 {
 839     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 840     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 841     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 842     std::vector<int>          ddranks;
 843     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 844
 845     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 846     {
 847         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 848         {
 849             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 850             {
 851                 if (cartSetup.bCartesianPP_PME)
 852                 {
 853                     ivec coord = { x, y, z };
 854                     ivec coord_pme;
 855                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 856                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 857                         cr->dd->ci[YY] == coord_pme[YY] &&
 858                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 859                     {
 860                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 861                     }
 862                 }
 863                 else
 864                 {
 865                     /* The slab corresponds to the nodeid in the PME group */
 866                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 867                     {
 868                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 869                     }
 870                 }
 871             }
 872         }
 873     }
 874     return ddranks;
 875 }
 876
 877 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 878                                  gmx::ArrayRef<const int>  pmeRanks,
 879                                  const t_commrec          *cr)
 880 {
 881     gmx_bool           bReceive    = TRUE;
 882
 883     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 884     if (ddRankSetup.usePmeOnlyRanks)
 885     {
 886         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 887         if (cartSetup.bCartesianPP_PME)
 888         {
 889 #if GMX_MPI
 890             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 891             ivec coords;
 892             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 893             coords[cartSetup.cartpmedim]++;
 894             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 895             {
 896                 int rank;
 897                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 898                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 899                 {
 900                     /* This is not the last PP node for pmenode */
 901                     bReceive = FALSE;
 902                 }
 903             }
 904 #else
 905             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 906 #endif
 907         }
 908         else
 909         {
 910             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 911             if (cr->sim_nodeid+1 < cr->nnodes &&
 912                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 913             {
 914                 /* This is not the last PP node for pmenode */
 915                 bReceive = FALSE;
 916             }
 917         }
 918     }
 919
 920     return bReceive;
 921 }
 922
 923 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 924 {
 925     gmx_domdec_comm_t *comm;
 926     int                i;
 927
 928     comm = dd->comm;
 929
 930     snew(*dim_f, dd->nc[dim]+1);
 931     (*dim_f)[0] = 0;
 932     for (i = 1; i < dd->nc[dim]; i++)
 933     {
 934         if (comm->slb_frac[dim])
 935         {
 936             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 937         }
 938         else
 939         {
 940             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 941         }
 942     }
 943     (*dim_f)[dd->nc[dim]] = 1;
 944 }
 945
 946 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 947 {
 948     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 949
 950     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 951     {
 952         ddpme->dim = YY;
 953     }
 954     else
 955     {
 956         ddpme->dim = dimind;
 957     }
 958     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 959
 960     ddpme->nslab = (ddpme->dim == 0 ?
 961                     ddRankSetup.npmenodes_x :
 962                     ddRankSetup.npmenodes_y);
 963
 964     if (ddpme->nslab <= 1)
 965     {
 966         return;
 967     }
 968
 969     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
 970     /* Determine for each PME slab the PP location range for dimension dim */
 971     snew(ddpme->pp_min, ddpme->nslab);
 972     snew(ddpme->pp_max, ddpme->nslab);
 973     for (int slab = 0; slab < ddpme->nslab; slab++)
 974     {
 975         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 976         ddpme->pp_max[slab] = 0;
 977     }
 978     for (int i = 0; i < dd->nnodes; i++)
 979     {
 980         ivec xyz;
 981         ddindex2xyz(dd->nc, i, xyz);
 982         /* For y only use our y/z slab.
 983          * This assumes that the PME x grid size matches the DD grid size.
 984          */
 985         if (dimind == 0 || xyz[XX] == dd->ci[XX])
 986         {
 987             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
 988             int       slab;
 989             if (dimind == 0)
 990             {
 991                 slab = pmeindex/nso;
 992             }
 993             else
 994             {
 995                 slab = pmeindex % ddpme->nslab;
 996             }
 997             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 998             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
 999         }
1000     }
1001
1002     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1003 }
1004
1005 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1006 {
1007     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1008
1009     if (ddRankSetup.ddpme[0].dim == XX)
1010     {
1011         return ddRankSetup.ddpme[0].maxshift;
1012     }
1013     else
1014     {
1015         return 0;
1016     }
1017 }
1018
1019 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1020 {
1021     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1022
1023     if (ddRankSetup.ddpme[0].dim == YY)
1024     {
1025         return ddRankSetup.ddpme[0].maxshift;
1026     }
1027     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1028     {
1029         return ddRankSetup.ddpme[1].maxshift;
1030     }
1031     else
1032     {
1033         return 0;
1034     }
1035 }
1036
1037 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1038 {
1039     return dd.comm->systemInfo.haveSplitConstraints;
1040 }
1041
1042 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1043 {
1044     /* Note that the cycles value can be incorrect, either 0 or some
1045      * extremely large value, when our thread migrated to another core
1046      * with an unsynchronized cycle counter. If this happens less often
1047      * that once per nstlist steps, this will not cause issues, since
1048      * we later subtract the maximum value from the sum over nstlist steps.
1049      * A zero count will slightly lower the total, but that's a small effect.
1050      * Note that the main purpose of the subtraction of the maximum value
1051      * is to avoid throwing off the load balancing when stalls occur due
1052      * e.g. system activity or network congestion.
1053      */
1054     dd->comm->cycl[ddCycl] += cycles;
1055     dd->comm->cycl_n[ddCycl]++;
1056     if (cycles > dd->comm->cycl_max[ddCycl])
1057     {
1058         dd->comm->cycl_max[ddCycl] = cycles;
1059     }
1060 }
1061
1062 #if GMX_MPI
1063 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1064 {
1065     MPI_Comm           c_row;
1066     int                dim, i, rank;
1067     ivec               loc_c;
1068     gmx_bool           bPartOfGroup = FALSE;
1069
1070     dim = dd->dim[dim_ind];
1071     copy_ivec(loc, loc_c);
1072     for (i = 0; i < dd->nc[dim]; i++)
1073     {
1074         loc_c[dim] = i;
1075         rank       = dd_index(dd->nc, loc_c);
1076         if (rank == dd->rank)
1077         {
1078             /* This process is part of the group */
1079             bPartOfGroup = TRUE;
1080         }
1081     }
1082     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1083                    &c_row);
1084     if (bPartOfGroup)
1085     {
1086         dd->comm->mpi_comm_load[dim_ind] = c_row;
1087         if (!isDlbDisabled(dd->comm))
1088         {
1089             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1090
1091             if (dd->ci[dim] == dd->master_ci[dim])
1092             {
1093                 /* This is the root process of this row */
1094                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1095
1096                 RowMaster &rowMaster = *cellsizes.rowMaster;
1097                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1098                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1099                 rowMaster.isCellMin.resize(dd->nc[dim]);
1100                 if (dim_ind > 0)
1101                 {
1102                     rowMaster.bounds.resize(dd->nc[dim]);
1103                 }
1104                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1105             }
1106             else
1107             {
1108                 /* This is not a root process, we only need to receive cell_f */
1109                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1110             }
1111         }
1112         if (dd->ci[dim] == dd->master_ci[dim])
1113         {
1114             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1115         }
1116     }
1117 }
1118 #endif
1119
1120 void dd_setup_dlb_resource_sharing(const t_commrec      *cr,
1121                                    int                   gpu_id)
1122 {
1123 #if GMX_MPI
1124     int           physicalnode_id_hash;
1125     gmx_domdec_t *dd;
1126     MPI_Comm      mpi_comm_pp_physicalnode;
1127
1128     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1129     {
1130         /* Only ranks with short-ranged tasks (currently) use GPUs.
1131          * If we don't have GPUs assigned, there are no resources to share.
1132          */
1133         return;
1134     }
1135
1136     physicalnode_id_hash = gmx_physicalnode_id_hash();
1137
1138     dd = cr->dd;
1139
1140     if (debug)
1141     {
1142         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1143         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1144                 dd->rank, physicalnode_id_hash, gpu_id);
1145     }
1146     /* Split the PP communicator over the physical nodes */
1147     /* TODO: See if we should store this (before), as it's also used for
1148      * for the nodecomm summation.
1149      */
1150     // TODO PhysicalNodeCommunicator could be extended/used to handle
1151     // the need for per-node per-group communicators.
1152     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1153                    &mpi_comm_pp_physicalnode);
1154     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1155                    &dd->comm->mpi_comm_gpu_shared);
1156     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1157     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1158
1159     if (debug)
1160     {
1161         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1162     }
1163
1164     /* Note that some ranks could share a GPU, while others don't */
1165
1166     if (dd->comm->nrank_gpu_shared == 1)
1167     {
1168         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1169     }
1170 #else
1171     GMX_UNUSED_VALUE(cr);
1172     GMX_UNUSED_VALUE(gpu_id);
1173 #endif
1174 }
1175
1176 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1177 {
1178 #if GMX_MPI
1179     int  dim0, dim1, i, j;
1180     ivec loc;
1181
1182     if (debug)
1183     {
1184         fprintf(debug, "Making load communicators\n");
1185     }
1186
1187     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1188     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1189
1190     if (dd->ndim == 0)
1191     {
1192         return;
1193     }
1194
1195     clear_ivec(loc);
1196     make_load_communicator(dd, 0, loc);
1197     if (dd->ndim > 1)
1198     {
1199         dim0 = dd->dim[0];
1200         for (i = 0; i < dd->nc[dim0]; i++)
1201         {
1202             loc[dim0] = i;
1203             make_load_communicator(dd, 1, loc);
1204         }
1205     }
1206     if (dd->ndim > 2)
1207     {
1208         dim0 = dd->dim[0];
1209         for (i = 0; i < dd->nc[dim0]; i++)
1210         {
1211             loc[dim0] = i;
1212             dim1      = dd->dim[1];
1213             for (j = 0; j < dd->nc[dim1]; j++)
1214             {
1215                 loc[dim1] = j;
1216                 make_load_communicator(dd, 2, loc);
1217             }
1218         }
1219     }
1220
1221     if (debug)
1222     {
1223         fprintf(debug, "Finished making load communicators\n");
1224     }
1225 #endif
1226 }
1227
1228 /*! \brief Sets up the relation between neighboring domains and zones */
1229 static void setup_neighbor_relations(gmx_domdec_t *dd)
1230 {
1231     int                     d, dim, i, j, m;
1232     ivec                    tmp, s;
1233     gmx_domdec_zones_t     *zones;
1234     gmx_domdec_ns_ranges_t *izone;
1235     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1236
1237     for (d = 0; d < dd->ndim; d++)
1238     {
1239         dim = dd->dim[d];
1240         copy_ivec(dd->ci, tmp);
1241         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1242         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1243         copy_ivec(dd->ci, tmp);
1244         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1245         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1246         if (debug)
1247         {
1248             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1249                     dd->rank, dim,
1250                     dd->neighbor[d][0],
1251                     dd->neighbor[d][1]);
1252         }
1253     }
1254
1255     int nzone  = (1 << dd->ndim);
1256     int nizone = (1 << std::max(dd->ndim - 1, 0));
1257     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1258
1259     zones = &dd->comm->zones;
1260
1261     for (i = 0; i < nzone; i++)
1262     {
1263         m = 0;
1264         clear_ivec(zones->shift[i]);
1265         for (d = 0; d < dd->ndim; d++)
1266         {
1267             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1268         }
1269     }
1270
1271     zones->n = nzone;
1272     for (i = 0; i < nzone; i++)
1273     {
1274         for (d = 0; d < DIM; d++)
1275         {
1276             s[d] = dd->ci[d] - zones->shift[i][d];
1277             if (s[d] < 0)
1278             {
1279                 s[d] += dd->nc[d];
1280             }
1281             else if (s[d] >= dd->nc[d])
1282             {
1283                 s[d] -= dd->nc[d];
1284             }
1285         }
1286     }
1287     zones->nizone = nizone;
1288     for (i = 0; i < zones->nizone; i++)
1289     {
1290         assert(ddNonbondedZonePairRanges[i][0] == i);
1291
1292         izone     = &zones->izone[i];
1293         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1294          * j-zones up to nzone.
1295          */
1296         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1297         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1298         for (dim = 0; dim < DIM; dim++)
1299         {
1300             if (dd->nc[dim] == 1)
1301             {
1302                 /* All shifts should be allowed */
1303                 izone->shift0[dim] = -1;
1304                 izone->shift1[dim] = 1;
1305             }
1306             else
1307             {
1308                 /* Determine the min/max j-zone shift wrt the i-zone */
1309                 izone->shift0[dim] = 1;
1310                 izone->shift1[dim] = -1;
1311                 for (j = izone->j0; j < izone->j1; j++)
1312                 {
1313                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1314                     if (shift_diff < izone->shift0[dim])
1315                     {
1316                         izone->shift0[dim] = shift_diff;
1317                     }
1318                     if (shift_diff > izone->shift1[dim])
1319                     {
1320                         izone->shift1[dim] = shift_diff;
1321                     }
1322                 }
1323             }
1324         }
1325     }
1326
1327     if (!isDlbDisabled(dd->comm))
1328     {
1329         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1330     }
1331
1332     if (dd->comm->ddSettings.recordLoad)
1333     {
1334         make_load_communicators(dd);
1335     }
1336 }
1337
1338 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1339                                  gmx_domdec_t         *dd,
1340                                  t_commrec gmx_unused *cr,
1341                                  bool gmx_unused       reorder)
1342 {
1343 #if GMX_MPI
1344     gmx_domdec_comm_t  *comm      = dd->comm;
1345     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1346
1347     if (cartSetup.bCartesianPP)
1348     {
1349         /* Set up cartesian communication for the particle-particle part */
1350         GMX_LOG(mdlog.info).appendTextFormatted(
1351                 "Will use a Cartesian communicator: %d x %d x %d",
1352                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1353
1354         ivec periods;
1355         for (int i = 0; i < DIM; i++)
1356         {
1357             periods[i] = TRUE;
1358         }
1359         MPI_Comm comm_cart;
1360         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1361                         &comm_cart);
1362         /* We overwrite the old communicator with the new cartesian one */
1363         cr->mpi_comm_mygroup = comm_cart;
1364     }
1365
1366     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1367     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1368
1369     if (cartSetup.bCartesianPP_PME)
1370     {
1371         /* Since we want to use the original cartesian setup for sim,
1372          * and not the one after split, we need to make an index.
1373          */
1374         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1375         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1376         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1377         /* Get the rank of the DD master,
1378          * above we made sure that the master node is a PP node.
1379          */
1380         int rank;
1381         if (MASTER(cr))
1382         {
1383             rank = dd->rank;
1384         }
1385         else
1386         {
1387             rank = 0;
1388         }
1389         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1390     }
1391     else if (cartSetup.bCartesianPP)
1392     {
1393         if (!comm->ddRankSetup.usePmeOnlyRanks)
1394         {
1395             /* The PP communicator is also
1396              * the communicator for this simulation
1397              */
1398             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1399         }
1400         cr->nodeid = dd->rank;
1401
1402         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1403
1404         /* We need to make an index to go from the coordinates
1405          * to the nodeid of this simulation.
1406          */
1407         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1408         std::vector<int> buf(dd->nnodes);
1409         if (thisRankHasDuty(cr, DUTY_PP))
1410         {
1411             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1412         }
1413         /* Communicate the ddindex to simulation nodeid index */
1414         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1415                       cr->mpi_comm_mysim);
1416
1417         /* Determine the master coordinates and rank.
1418          * The DD master should be the same node as the master of this sim.
1419          */
1420         for (int i = 0; i < dd->nnodes; i++)
1421         {
1422             if (cartSetup.ddindex2simnodeid[i] == 0)
1423             {
1424                 ddindex2xyz(dd->nc, i, dd->master_ci);
1425                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1426             }
1427         }
1428         if (debug)
1429         {
1430             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1431         }
1432     }
1433     else
1434     {
1435         /* No Cartesian communicators */
1436         /* We use the rank in dd->comm->all as DD index */
1437         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1438         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1439         dd->masterrank = 0;
1440         clear_ivec(dd->master_ci);
1441     }
1442 #endif
1443
1444     GMX_LOG(mdlog.info).appendTextFormatted(
1445             "Domain decomposition rank %d, coordinates %d %d %d\n",
1446             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1447     if (debug)
1448     {
1449         fprintf(debug,
1450                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1451                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1452     }
1453 }
1454
1455 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1456                                       t_commrec            *cr)
1457 {
1458 #if GMX_MPI
1459     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1460
1461     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1462     {
1463         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1464         std::vector<int> buf(dd->nnodes);
1465         if (thisRankHasDuty(cr, DUTY_PP))
1466         {
1467             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1468         }
1469         /* Communicate the ddindex to simulation nodeid index */
1470         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1471                       cr->mpi_comm_mysim);
1472     }
1473 #else
1474     GMX_UNUSED_VALUE(dd);
1475     GMX_UNUSED_VALUE(cr);
1476 #endif
1477 }
1478
1479 static CartesianRankSetup
1480 split_communicator(const gmx::MDLogger    &mdlog,
1481                    t_commrec              *cr,
1482                    const DdRankOrder       ddRankOrder,
1483                    bool gmx_unused         reorder,
1484                    const DDRankSetup      &ddRankSetup,
1485                    ivec                    ddCellIndex,
1486                    std::vector<int>       *pmeRanks)
1487 {
1488     CartesianRankSetup cartSetup;
1489
1490     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1491     cartSetup.bCartesianPP_PME = false;
1492
1493     const ivec &numDDCells     = ddRankSetup.numPPCells;
1494     /* Initially we set ntot to the number of PP cells */
1495     copy_ivec(numDDCells, cartSetup.ntot);
1496
1497     if (cartSetup.bCartesianPP)
1498     {
1499         const int numDDCellsTot = ddRankSetup.numPPRanks;
1500         bool      bDiv[DIM];
1501         for (int i = 1; i < DIM; i++)
1502         {
1503             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1504         }
1505         if (bDiv[YY] || bDiv[ZZ])
1506         {
1507             cartSetup.bCartesianPP_PME = TRUE;
1508             /* If we have 2D PME decomposition, which is always in x+y,
1509              * we stack the PME only nodes in z.
1510              * Otherwise we choose the direction that provides the thinnest slab
1511              * of PME only nodes as this will have the least effect
1512              * on the PP communication.
1513              * But for the PME communication the opposite might be better.
1514              */
1515             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1516                              !bDiv[YY] ||
1517                              numDDCells[YY] > numDDCells[ZZ]))
1518             {
1519                 cartSetup.cartpmedim = ZZ;
1520             }
1521             else
1522             {
1523                 cartSetup.cartpmedim = YY;
1524             }
1525             cartSetup.ntot[cartSetup.cartpmedim]
1526                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1527         }
1528         else
1529         {
1530             GMX_LOG(mdlog.info).appendTextFormatted(
1531                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1532                     ddRankSetup.numRanksDoingPme,
1533                     numDDCells[XX], numDDCells[YY],
1534                     numDDCells[XX], numDDCells[ZZ]);
1535             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1536         }
1537     }
1538
1539     if (cartSetup.bCartesianPP_PME)
1540     {
1541 #if GMX_MPI
1542         int  rank;
1543         ivec periods;
1544
1545         GMX_LOG(mdlog.info).appendTextFormatted(
1546                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1547                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1548
1549         for (int i = 0; i < DIM; i++)
1550         {
1551             periods[i] = TRUE;
1552         }
1553         MPI_Comm comm_cart;
1554         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1555                         &comm_cart);
1556         MPI_Comm_rank(comm_cart, &rank);
1557         if (MASTER(cr) && rank != 0)
1558         {
1559             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1560         }
1561
1562         /* With this assigment we loose the link to the original communicator
1563          * which will usually be MPI_COMM_WORLD, unless have multisim.
1564          */
1565         cr->mpi_comm_mysim = comm_cart;
1566         cr->sim_nodeid     = rank;
1567
1568         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1569
1570         GMX_LOG(mdlog.info).appendTextFormatted(
1571                 "Cartesian rank %d, coordinates %d %d %d\n",
1572                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1573
1574         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1575         {
1576             cr->duty = DUTY_PP;
1577         }
1578         if (!ddRankSetup.usePmeOnlyRanks ||
1579             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1580         {
1581             cr->duty = DUTY_PME;
1582         }
1583
1584         /* Split the sim communicator into PP and PME only nodes */
1585         MPI_Comm_split(cr->mpi_comm_mysim,
1586                        getThisRankDuties(cr),
1587                        dd_index(cartSetup.ntot, ddCellIndex),
1588                        &cr->mpi_comm_mygroup);
1589 #else
1590         GMX_UNUSED_VALUE(ddCellIndex);
1591 #endif
1592     }
1593     else
1594     {
1595         switch (ddRankOrder)
1596         {
1597             case DdRankOrder::pp_pme:
1598                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1599                 break;
1600             case DdRankOrder::interleave:
1601                 /* Interleave the PP-only and PME-only ranks */
1602                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1603                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1604                 break;
1605             case DdRankOrder::cartesian:
1606                 break;
1607             default:
1608                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1609         }
1610
1611         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1612         {
1613             cr->duty = DUTY_PME;
1614         }
1615         else
1616         {
1617             cr->duty = DUTY_PP;
1618         }
1619 #if GMX_MPI
1620         /* Split the sim communicator into PP and PME only nodes */
1621         MPI_Comm_split(cr->mpi_comm_mysim,
1622                        getThisRankDuties(cr),
1623                        cr->nodeid,
1624                        &cr->mpi_comm_mygroup);
1625         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1626 #endif
1627     }
1628
1629     GMX_LOG(mdlog.info).appendTextFormatted(
1630             "This rank does only %s work.\n",
1631             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1632
1633     return cartSetup;
1634 }
1635
1636 /*! \brief Makes the PP communicator and the PME communicator, when needed
1637  *
1638  * Returns the Cartesian rank setup.
1639  * Sets \p cr->mpi_comm_mygroup
1640  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1641  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1642  */
1643 static CartesianRankSetup
1644 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1645                        const DDSettings    &ddSettings,
1646                        const DdRankOrder    ddRankOrder,
1647                        const DDRankSetup   &ddRankSetup,
1648                        t_commrec           *cr,
1649                        ivec                 ddCellIndex,
1650                        std::vector<int>    *pmeRanks)
1651 {
1652     CartesianRankSetup cartSetup;
1653
1654     if (ddRankSetup.usePmeOnlyRanks)
1655     {
1656         /* Split the communicator into a PP and PME part */
1657         cartSetup =
1658             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1659                                ddRankSetup, ddCellIndex, pmeRanks);
1660     }
1661     else
1662     {
1663         /* All nodes do PP and PME */
1664         /* We do not require separate communicators */
1665         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1666
1667         cartSetup.bCartesianPP     = false;
1668         cartSetup.bCartesianPP_PME = false;
1669     }
1670
1671     return cartSetup;
1672 }
1673
1674 /*! \brief For PP ranks, sets or makes the communicator
1675  *
1676  * For PME ranks get the rank id.
1677  * For PP only ranks, sets the PME-only rank.
1678  */
1679 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1680                                     const DDSettings         &ddSettings,
1681                                     gmx::ArrayRef<const int>  pmeRanks,
1682                                     t_commrec                *cr,
1683                                     const int                 numAtomsInSystem,
1684                                     gmx_domdec_t             *dd)
1685 {
1686     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1687     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1688
1689     if (thisRankHasDuty(cr, DUTY_PP))
1690     {
1691         /* Copy or make a new PP communicator */
1692
1693         /* We (possibly) reordered the nodes in split_communicator,
1694          * so it is no longer required in make_pp_communicator.
1695          */
1696         const bool useCartesianReorder =
1697             (ddSettings.useCartesianReorder &&
1698              !cartSetup.bCartesianPP_PME);
1699
1700         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1701     }
1702     else
1703     {
1704         receive_ddindex2simnodeid(dd, cr);
1705     }
1706
1707     if (!thisRankHasDuty(cr, DUTY_PME))
1708     {
1709         /* Set up the commnuication to our PME node */
1710         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1711         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1712         if (debug)
1713         {
1714             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1715                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1716         }
1717     }
1718     else
1719     {
1720         dd->pme_nodeid = -1;
1721     }
1722
1723     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1724     if (MASTER(cr))
1725     {
1726         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1727                                                     numAtomsInSystem,
1728                                                     numAtomsInSystem);
1729     }
1730 }
1731
1732 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1733                           const char *dir, int nc, const char *size_string)
1734 {
1735     real  *slb_frac, tot;
1736     int    i, n;
1737     double dbl;
1738
1739     slb_frac = nullptr;
1740     if (nc > 1 && size_string != nullptr)
1741     {
1742         GMX_LOG(mdlog.info).appendTextFormatted(
1743                 "Using static load balancing for the %s direction", dir);
1744         snew(slb_frac, nc);
1745         tot = 0;
1746         for (i = 0; i < nc; i++)
1747         {
1748             dbl = 0;
1749             sscanf(size_string, "%20lf%n", &dbl, &n);
1750             if (dbl == 0)
1751             {
1752                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1753             }
1754             slb_frac[i]  = dbl;
1755             size_string += n;
1756             tot         += slb_frac[i];
1757         }
1758         /* Normalize */
1759         std::string relativeCellSizes = "Relative cell sizes:";
1760         for (i = 0; i < nc; i++)
1761         {
1762             slb_frac[i]       /= tot;
1763             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1764         }
1765         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1766     }
1767
1768     return slb_frac;
1769 }
1770
1771 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1772 {
1773     int                  n     = 0;
1774     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1775     int                  nmol;
1776     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1777     {
1778         for (auto &ilist : extractILists(*ilists, IF_BOND))
1779         {
1780             if (NRAL(ilist.functionType) >  2)
1781             {
1782                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1783             }
1784         }
1785     }
1786
1787     return n;
1788 }
1789
1790 static int dd_getenv(const gmx::MDLogger &mdlog,
1791                      const char *env_var, int def)
1792 {
1793     char *val;
1794     int   nst;
1795
1796     nst = def;
1797     val = getenv(env_var);
1798     if (val)
1799     {
1800         if (sscanf(val, "%20d", &nst) <= 0)
1801         {
1802             nst = 1;
1803         }
1804         GMX_LOG(mdlog.info).appendTextFormatted(
1805                 "Found env.var. %s = %s, using value %d",
1806                 env_var, val, nst);
1807     }
1808
1809     return nst;
1810 }
1811
1812 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1813                                   const t_inputrec    *ir,
1814                                   const gmx::MDLogger &mdlog)
1815 {
1816     if (ir->ePBC == epbcSCREW &&
1817         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1818     {
1819         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1820     }
1821
1822     if (ir->nstlist == 0)
1823     {
1824         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1825     }
1826
1827     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1828     {
1829         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1830     }
1831 }
1832
1833 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1834                                  const ivec         numDomains)
1835 {
1836     real r = ddbox.box_size[XX];
1837     for (int d = 0; d < DIM; d++)
1838     {
1839         if (numDomains[d] > 1)
1840         {
1841             /* Check using the initial average cell size */
1842             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1843         }
1844     }
1845
1846     return r;
1847 }
1848
1849 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1850  */
1851 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1852                                   const std::string   &reasonStr,
1853                                   const gmx::MDLogger &mdlog)
1854 {
1855     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1856     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1857
1858     if (cmdlineDlbState == DlbState::onUser)
1859     {
1860         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1861     }
1862     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1863     {
1864         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1865     }
1866     return DlbState::offForever;
1867 }
1868
1869 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1870  *
1871  * This function parses the parameters of "-dlb" command line option setting
1872  * corresponding state values. Then it checks the consistency of the determined
1873  * state with other run parameters and settings. As a result, the initial state
1874  * may be altered or an error may be thrown if incompatibility of options is detected.
1875  *
1876  * \param [in] mdlog       Logger.
1877  * \param [in] dlbOption   Enum value for the DLB option.
1878  * \param [in] bRecordLoad True if the load balancer is recording load information.
1879  * \param [in] mdrunOptions  Options for mdrun.
1880  * \param [in] ir          Pointer mdrun to input parameters.
1881  * \returns                DLB initial/startup state.
1882  */
1883 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1884                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1885                                          const gmx::MdrunOptions &mdrunOptions,
1886                                          const t_inputrec *ir)
1887 {
1888     DlbState dlbState = DlbState::offCanTurnOn;
1889
1890     switch (dlbOption)
1891     {
1892         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1893         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1894         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1895         default: gmx_incons("Invalid dlbOption enum value");
1896     }
1897
1898     /* Reruns don't support DLB: bail or override auto mode */
1899     if (mdrunOptions.rerun)
1900     {
1901         std::string reasonStr = "it is not supported in reruns.";
1902         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1903     }
1904
1905     /* Unsupported integrators */
1906     if (!EI_DYNAMICS(ir->eI))
1907     {
1908         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1909         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1910     }
1911
1912     /* Without cycle counters we can't time work to balance on */
1913     if (!bRecordLoad)
1914     {
1915         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1916         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1917     }
1918
1919     if (mdrunOptions.reproducible)
1920     {
1921         std::string reasonStr = "you started a reproducible run.";
1922         switch (dlbState)
1923         {
1924             case DlbState::offUser:
1925                 break;
1926             case DlbState::offForever:
1927                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1928                 break;
1929             case DlbState::offCanTurnOn:
1930                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1931             case DlbState::onCanTurnOff:
1932                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1933                 break;
1934             case DlbState::onUser:
1935                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1936             default:
1937                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1938         }
1939     }
1940
1941     return dlbState;
1942 }
1943
1944 static gmx_domdec_comm_t *init_dd_comm()
1945 {
1946     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1947
1948     comm->n_load_have      = 0;
1949     comm->n_load_collect   = 0;
1950
1951     comm->haveTurnedOffDlb = false;
1952
1953     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1954     {
1955         comm->sum_nat[i] = 0;
1956     }
1957     comm->ndecomp   = 0;
1958     comm->nload     = 0;
1959     comm->load_step = 0;
1960     comm->load_sum  = 0;
1961     comm->load_max  = 0;
1962     clear_ivec(comm->load_lim);
1963     comm->load_mdf  = 0;
1964     comm->load_pme  = 0;
1965
1966     /* This should be replaced by a unique pointer */
1967     comm->balanceRegion = ddBalanceRegionAllocate();
1968
1969     return comm;
1970 }
1971
1972 /* Returns whether mtop contains constraints and/or vsites */
1973 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1974 {
1975     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1976     int  nmol;
1977     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1978     {
1979         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1980         {
1981             return true;
1982         }
1983     }
1984
1985     return false;
1986 }
1987
1988 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1989                               const gmx_mtop_t    &mtop,
1990                               const t_inputrec    &inputrec,
1991                               const real           cutoffMargin,
1992                               DDSystemInfo        *systemInfo)
1993 {
1994     /* When we have constraints and/or vsites, it is beneficial to use
1995      * update groups (when possible) to allow independent update of groups.
1996      */
1997     if (!systemHasConstraintsOrVsites(mtop))
1998     {
1999         /* No constraints or vsites, atoms can be updated independently */
2000         return;
2001     }
2002
2003     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2004     systemInfo->useUpdateGroups               =
2005         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2006          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2007
2008     if (systemInfo->useUpdateGroups)
2009     {
2010         int numUpdateGroups = 0;
2011         for (const auto &molblock : mtop.molblock)
2012         {
2013             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2014         }
2015
2016         systemInfo->maxUpdateGroupRadius =
2017             computeMaxUpdateGroupRadius(mtop,
2018                                         systemInfo->updateGroupingPerMoleculetype,
2019                                         maxReferenceTemperature(inputrec));
2020
2021         /* To use update groups, the large domain-to-domain cutoff distance
2022          * should be compatible with the box size.
2023          */
2024         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2025
2026         if (systemInfo->useUpdateGroups)
2027         {
2028             GMX_LOG(mdlog.info).appendTextFormatted(
2029                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2030                     numUpdateGroups,
2031                     mtop.natoms/static_cast<double>(numUpdateGroups),
2032                     systemInfo->maxUpdateGroupRadius);
2033         }
2034         else
2035         {
2036             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2037             systemInfo->updateGroupingPerMoleculetype.clear();
2038         }
2039     }
2040 }
2041
2042 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2043     npbcdim(ePBC2npbcdim(ir.ePBC)),
2044     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2045     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2046     haveScrewPBC(ir.ePBC == epbcSCREW)
2047 {
2048 }
2049
2050 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2051 static bool
2052 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2053                         const bool                                   useUpdateGroups,
2054                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2055 {
2056     if (useUpdateGroups)
2057     {
2058         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2059                            "Need one grouping per moltype");
2060         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2061         {
2062             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2063             {
2064                 return false;
2065             }
2066         }
2067     }
2068     else
2069     {
2070         for (const auto &moltype : mtop.moltype)
2071         {
2072             if (moltype.atoms.nr > 1)
2073             {
2074                 return false;
2075             }
2076         }
2077     }
2078
2079     return true;
2080 }
2081
2082 /*! \brief Generate the simulation system information */
2083 static DDSystemInfo
2084 getSystemInfo(const gmx::MDLogger           &mdlog,
2085               const t_commrec               *cr,
2086               const DomdecOptions           &options,
2087               const gmx_mtop_t              &mtop,
2088               const t_inputrec              &ir,
2089               const matrix                   box,
2090               gmx::ArrayRef<const gmx::RVec> xGlobal)
2091 {
2092     const real         tenPercentMargin = 1.1;
2093
2094     DDSystemInfo       systemInfo;
2095
2096     /* We need to decide on update groups early, as this affects communication distances */
2097     systemInfo.useUpdateGroups = false;
2098     if (ir.cutoff_scheme == ecutsVERLET)
2099     {
2100         real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2101         setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2102     }
2103
2104     systemInfo.moleculesAreAlwaysWhole         =
2105         moleculesAreAlwaysWhole(mtop,
2106                                 systemInfo.useUpdateGroups,
2107                                 systemInfo.updateGroupingPerMoleculetype);
2108     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2109                                                   mtop.bIntermolecularInteractions);
2110     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2111                                                   multi_body_bondeds_count(&mtop) > 0);
2112
2113     if (systemInfo.useUpdateGroups)
2114     {
2115         systemInfo.haveSplitConstraints = false;
2116         systemInfo.haveSplitSettles     = false;
2117     }
2118     else
2119     {
2120         systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2121                                            gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2122         systemInfo.haveSplitSettles     = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2123     }
2124
2125     if (ir.rlist == 0)
2126     {
2127         /* Set the cut-off to some very large value,
2128          * so we don't need if statements everywhere in the code.
2129          * We use sqrt, since the cut-off is squared in some places.
2130          */
2131         systemInfo.cutoff = GMX_CUTOFF_INF;
2132     }
2133     else
2134     {
2135         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2136     }
2137     systemInfo.minCutoffForMultiBody = 0;
2138
2139     /* Determine the minimum cell size limit, affected by many factors */
2140     systemInfo.cellsizeLimit             = 0;
2141     systemInfo.filterBondedCommunication = false;
2142
2143     /* We do not allow home atoms to move beyond the neighboring domain
2144      * between domain decomposition steps, which limits the cell size.
2145      * Get an estimate of cell size limit due to atom displacement.
2146      * In most cases this is a large overestimate, because it assumes
2147      * non-interaction atoms.
2148      * We set the chance to 1 in a trillion steps.
2149      */
2150     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2151     const real     limitForAtomDisplacement          =
2152         minCellSizeForAtomDisplacement(mtop, ir,
2153                                        systemInfo.updateGroupingPerMoleculetype,
2154                                        c_chanceThatAtomMovesBeyondDomain);
2155     GMX_LOG(mdlog.info).appendTextFormatted(
2156             "Minimum cell size due to atom displacement: %.3f nm",
2157             limitForAtomDisplacement);
2158
2159     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2160                                         limitForAtomDisplacement);
2161
2162     /* TODO: PME decomposition currently requires atoms not to be more than
2163      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2164      *       In nearly all cases, limitForAtomDisplacement will be smaller
2165      *       than 2/3*rlist, so the PME requirement is satisfied.
2166      *       But it would be better for both correctness and performance
2167      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2168      *       Note that we would need to improve the pairlist buffer case.
2169      */
2170
2171     if (systemInfo.haveInterDomainBondeds)
2172     {
2173         if (options.minimumCommunicationRange > 0)
2174         {
2175             systemInfo.minCutoffForMultiBody =
2176                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2177             if (options.useBondedCommunication)
2178             {
2179                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2180             }
2181             else
2182             {
2183                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2184                                              systemInfo.minCutoffForMultiBody);
2185             }
2186         }
2187         else if (ir.bPeriodicMols)
2188         {
2189             /* Can not easily determine the required cut-off */
2190             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2191             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2192         }
2193         else
2194         {
2195             real r_2b, r_mb;
2196
2197             if (MASTER(cr))
2198             {
2199                 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2200                                       options.checkBondedInteractions,
2201                                       &r_2b, &r_mb);
2202             }
2203             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2204             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2205
2206             /* We use an initial margin of 10% for the minimum cell size,
2207              * except when we are just below the non-bonded cut-off.
2208              */
2209             if (options.useBondedCommunication)
2210             {
2211                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2212                 {
2213                     const real r_bonded              = std::max(r_2b, r_mb);
2214                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2215                     /* This is the (only) place where we turn on the filtering */
2216                     systemInfo.filterBondedCommunication = true;
2217                 }
2218                 else
2219                 {
2220                     const real r_bonded              = r_mb;
2221                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2222                                                                 systemInfo.cutoff);
2223                 }
2224                 /* We determine cutoff_mbody later */
2225                 systemInfo.increaseMultiBodyCutoff = true;
2226             }
2227             else
2228             {
2229                 /* No special bonded communication,
2230                  * simply increase the DD cut-off.
2231                  */
2232                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2233                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2234                                                             systemInfo.minCutoffForMultiBody);
2235             }
2236         }
2237         GMX_LOG(mdlog.info).appendTextFormatted(
2238                 "Minimum cell size due to bonded interactions: %.3f nm",
2239                 systemInfo.minCutoffForMultiBody);
2240
2241         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2242                                             systemInfo.minCutoffForMultiBody);
2243     }
2244
2245     systemInfo.constraintCommunicationRange = 0;
2246     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2247     {
2248         /* There is a cell size limit due to the constraints (P-LINCS) */
2249         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2250         GMX_LOG(mdlog.info).appendTextFormatted(
2251                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2252                 systemInfo.constraintCommunicationRange);
2253         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2254         {
2255             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2256         }
2257     }
2258     else if (options.constraintCommunicationRange > 0)
2259     {
2260         /* Here we do not check for dd->splitConstraints.
2261          * because one can also set a cell size limit for virtual sites only
2262          * and at this point we don't know yet if there are intercg v-sites.
2263          */
2264         GMX_LOG(mdlog.info).appendTextFormatted(
2265                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2266                 options.constraintCommunicationRange);
2267         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2268     }
2269     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2270                                         systemInfo.constraintCommunicationRange);
2271
2272     return systemInfo;
2273 }
2274
2275 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2276  * implemented. */
2277 static void
2278 checkDDGridSetup(const DDGridSetup   &ddGridSetup,
2279                  const t_commrec     *cr,
2280                  const DomdecOptions &options,
2281                  const DDSettings    &ddSettings,
2282                  const DDSystemInfo  &systemInfo,
2283                  const real           cellsizeLimit,
2284                  const gmx_ddbox_t   &ddbox)
2285 {
2286     if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2287     {
2288         char     buf[STRLEN];
2289         gmx_bool bC = (systemInfo.haveSplitConstraints &&
2290                        systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2291         sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2292                 !bC ? "-rdd" : "-rcon",
2293                 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2294                 bC ? " or your LINCS settings" : "");
2295
2296         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2297                              "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2298                              "%s\n"
2299                              "Look in the log file for details on the domain decomposition",
2300                              cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2301                              cellsizeLimit,
2302                              buf);
2303     }
2304
2305     const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2306     if (acs < cellsizeLimit)
2307     {
2308         if (options.numCells[XX] <= 0)
2309         {
2310             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2311         }
2312         else
2313         {
2314             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2315                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2316                                  acs, cellsizeLimit);
2317         }
2318     }
2319
2320     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2321     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2322     {
2323         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2324                              "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2325                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2326     }
2327     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2328     {
2329         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2330                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2331     }
2332 }
2333
2334 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2335 static DDRankSetup
2336 getDDRankSetup(const gmx::MDLogger &mdlog,
2337                t_commrec           *cr,
2338                const DDGridSetup   &ddGridSetup,
2339                const t_inputrec    &ir)
2340 {
2341     GMX_LOG(mdlog.info).appendTextFormatted(
2342             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2343             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2344             ddGridSetup.numPmeOnlyRanks);
2345
2346     DDRankSetup ddRankSetup;
2347
2348     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2349     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2350
2351     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2352     if (ddRankSetup.usePmeOnlyRanks)
2353     {
2354         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2355     }
2356     else
2357     {
2358         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2359     }
2360
2361     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2362     {
2363         /* The following choices should match those
2364          * in comm_cost_est in domdec_setup.c.
2365          * Note that here the checks have to take into account
2366          * that the decomposition might occur in a different order than xyz
2367          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2368          * in which case they will not match those in comm_cost_est,
2369          * but since that is mainly for testing purposes that's fine.
2370          */
2371         if (ddGridSetup.numDDDimensions >= 2 &&
2372             ddGridSetup.ddDimensions[0] == XX &&
2373             ddGridSetup.ddDimensions[1] == YY &&
2374             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2375             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2376             getenv("GMX_PMEONEDD") == nullptr)
2377         {
2378             ddRankSetup.npmedecompdim = 2;
2379             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2380             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2381         }
2382         else
2383         {
2384             /* In case nc is 1 in both x and y we could still choose to
2385              * decompose pme in y instead of x, but we use x for simplicity.
2386              */
2387             ddRankSetup.npmedecompdim = 1;
2388             if (ddGridSetup.ddDimensions[0] == YY)
2389             {
2390                 ddRankSetup.npmenodes_x = 1;
2391                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2392             }
2393             else
2394             {
2395                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2396                 ddRankSetup.npmenodes_y = 1;
2397             }
2398         }
2399         GMX_LOG(mdlog.info).appendTextFormatted(
2400                 "PME domain decomposition: %d x %d x %d",
2401                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2402     }
2403     else
2404     {
2405         ddRankSetup.npmedecompdim = 0;
2406         ddRankSetup.npmenodes_x   = 0;
2407         ddRankSetup.npmenodes_y   = 0;
2408     }
2409
2410     return ddRankSetup;
2411 }
2412
2413 /*! \brief Set the cell size and interaction limits */
2414 static void set_dd_limits(const gmx::MDLogger &mdlog,
2415                           t_commrec *cr, gmx_domdec_t *dd,
2416                           const DomdecOptions &options,
2417                           const DDSettings &ddSettings,
2418                           const DDSystemInfo &systemInfo,
2419                           const DDGridSetup &ddGridSetup,
2420                           const int numPPRanks,
2421                           const gmx_mtop_t *mtop,
2422                           const t_inputrec *ir,
2423                           const gmx_ddbox_t &ddbox)
2424 {
2425     gmx_domdec_comm_t *comm = dd->comm;
2426     comm->ddSettings        = ddSettings;
2427
2428     /* Initialize to GPU share count to 0, might change later */
2429     comm->nrank_gpu_shared = 0;
2430
2431     comm->dlbState         = comm->ddSettings.initialDlbState;
2432     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2433     /* To consider turning DLB on after 2*nstlist steps we need to check
2434      * at partitioning count 3. Thus we need to increase the first count by 2.
2435      */
2436     comm->ddPartioningCountFirstDlbOff += 2;
2437
2438     comm->bPMELoadBalDLBLimits          = FALSE;
2439
2440     /* Allocate the charge group/atom sorting struct */
2441     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2442
2443     comm->systemInfo = systemInfo;
2444
2445     if (systemInfo.useUpdateGroups)
2446     {
2447         /* Note: We would like to use dd->nnodes for the atom count estimate,
2448          *       but that is not yet available here. But this anyhow only
2449          *       affect performance up to the second dd_partition_system call.
2450          */
2451         const int homeAtomCountEstimate =  mtop->natoms/numPPRanks;
2452         comm->updateGroupsCog =
2453             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2454                                                    systemInfo.updateGroupingPerMoleculetype,
2455                                                    maxReferenceTemperature(*ir),
2456                                                    homeAtomCountEstimate);
2457     }
2458
2459     /* Set the DD setup given by ddGridSetup */
2460     copy_ivec(ddGridSetup.numDomains, dd->nc);
2461     dd->ndim = ddGridSetup.numDDDimensions;
2462     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2463
2464     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2465
2466     snew(comm->slb_frac, DIM);
2467     if (isDlbDisabled(comm))
2468     {
2469         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2470         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2471         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2472     }
2473
2474     /* Set the multi-body cut-off and cellsize limit for DLB */
2475     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2476     comm->cellsize_limit = systemInfo.cellsizeLimit;
2477     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2478     {
2479         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2480         {
2481             /* Set the bonded communication distance to halfway
2482              * the minimum and the maximum,
2483              * since the extra communication cost is nearly zero.
2484              */
2485             real acs           = average_cellsize_min(ddbox, dd->nc);
2486             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2487             if (!isDlbDisabled(comm))
2488             {
2489                 /* Check if this does not limit the scaling */
2490                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2491                                               options.dlbScaling*acs);
2492             }
2493             if (!systemInfo.filterBondedCommunication)
2494             {
2495                 /* Without bBondComm do not go beyond the n.b. cut-off */
2496                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2497                 if (comm->cellsize_limit >= systemInfo.cutoff)
2498                 {
2499                     /* We don't loose a lot of efficieny
2500                      * when increasing it to the n.b. cut-off.
2501                      * It can even be slightly faster, because we need
2502                      * less checks for the communication setup.
2503                      */
2504                     comm->cutoff_mbody = systemInfo.cutoff;
2505                 }
2506             }
2507             /* Check if we did not end up below our original limit */
2508             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2509                                           systemInfo.minCutoffForMultiBody);
2510
2511             if (comm->cutoff_mbody > comm->cellsize_limit)
2512             {
2513                 comm->cellsize_limit = comm->cutoff_mbody;
2514             }
2515         }
2516         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2517     }
2518
2519     if (debug)
2520     {
2521         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2522                 "cellsize limit %f\n",
2523                 gmx::boolToString(systemInfo.filterBondedCommunication),
2524                 comm->cellsize_limit);
2525     }
2526
2527     if (MASTER(cr))
2528     {
2529         check_dd_restrictions(dd, ir, mdlog);
2530     }
2531 }
2532
2533 void dd_init_bondeds(FILE                       *fplog,
2534                      gmx_domdec_t               *dd,
2535                      const gmx_mtop_t           *mtop,
2536                      const gmx_vsite_t          *vsite,
2537                      const t_inputrec           *ir,
2538                      gmx_bool                    bBCheck,
2539                      cginfo_mb_t                *cginfo_mb)
2540 {
2541     gmx_domdec_comm_t *comm;
2542
2543     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2544
2545     comm = dd->comm;
2546
2547     if (comm->systemInfo.filterBondedCommunication)
2548     {
2549         /* Communicate atoms beyond the cut-off for bonded interactions */
2550         comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2551     }
2552     else
2553     {
2554         /* Only communicate atoms based on cut-off */
2555         comm->bondedLinks = nullptr;
2556     }
2557 }
2558
2559 static void writeSettings(gmx::TextWriter       *log,
2560                           gmx_domdec_t          *dd,
2561                           const gmx_mtop_t      *mtop,
2562                           const t_inputrec      *ir,
2563                           gmx_bool               bDynLoadBal,
2564                           real                   dlb_scale,
2565                           const gmx_ddbox_t     *ddbox)
2566 {
2567     gmx_domdec_comm_t *comm;
2568     int                d;
2569     ivec               np;
2570     real               limit, shrink;
2571
2572     comm = dd->comm;
2573
2574     if (bDynLoadBal)
2575     {
2576         log->writeString("The maximum number of communication pulses is:");
2577         for (d = 0; d < dd->ndim; d++)
2578         {
2579             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2580         }
2581         log->ensureLineBreak();
2582         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2583         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2584         log->writeString("The allowed shrink of domain decomposition cells is:");
2585         for (d = 0; d < DIM; d++)
2586         {
2587             if (dd->nc[d] > 1)
2588             {
2589                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2590                 {
2591                     shrink = 0;
2592                 }
2593                 else
2594                 {
2595                     shrink =
2596                         comm->cellsize_min_dlb[d]/
2597                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2598                 }
2599                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2600             }
2601         }
2602         log->ensureLineBreak();
2603     }
2604     else
2605     {
2606         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2607         log->writeString("The initial number of communication pulses is:");
2608         for (d = 0; d < dd->ndim; d++)
2609         {
2610             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2611         }
2612         log->ensureLineBreak();
2613         log->writeString("The initial domain decomposition cell size is:");
2614         for (d = 0; d < DIM; d++)
2615         {
2616             if (dd->nc[d] > 1)
2617             {
2618                 log->writeStringFormatted(" %c %.2f nm",
2619                                           dim2char(d), dd->comm->cellsize_min[d]);
2620             }
2621         }
2622         log->ensureLineBreak();
2623         log->writeLine();
2624     }
2625
2626     const bool haveInterDomainVsites =
2627         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2628
2629     if (comm->systemInfo.haveInterDomainBondeds ||
2630         haveInterDomainVsites ||
2631         comm->systemInfo.haveSplitConstraints ||
2632         comm->systemInfo.haveSplitSettles)
2633     {
2634         std::string decompUnits;
2635         if (comm->systemInfo.useUpdateGroups)
2636         {
2637             decompUnits = "atom groups";
2638         }
2639         else
2640         {
2641             decompUnits = "atoms";
2642         }
2643
2644         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2645         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2646
2647         if (bDynLoadBal)
2648         {
2649             limit = dd->comm->cellsize_limit;
2650         }
2651         else
2652         {
2653             if (dd->unitCellInfo.ddBoxIsDynamic)
2654             {
2655                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2656             }
2657             limit = dd->comm->cellsize_min[XX];
2658             for (d = 1; d < DIM; d++)
2659             {
2660                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2661             }
2662         }
2663
2664         if (comm->systemInfo.haveInterDomainBondeds)
2665         {
2666             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2667                                     "two-body bonded interactions", "(-rdd)",
2668                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2669             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2670                                     "multi-body bonded interactions", "(-rdd)",
2671                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2672         }
2673         if (haveInterDomainVsites)
2674         {
2675             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2676                                     "virtual site constructions", "(-rcon)", limit);
2677         }
2678         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2679         {
2680             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2681                                                        1+ir->nProjOrder);
2682             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2683                                     separation.c_str(), "(-rcon)", limit);
2684         }
2685         log->ensureLineBreak();
2686     }
2687 }
2688
2689 static void logSettings(const gmx::MDLogger &mdlog,
2690                         gmx_domdec_t        *dd,
2691                         const gmx_mtop_t    *mtop,
2692                         const t_inputrec    *ir,
2693                         real                 dlb_scale,
2694                         const gmx_ddbox_t   *ddbox)
2695 {
2696     gmx::StringOutputStream stream;
2697     gmx::TextWriter         log(&stream);
2698     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2699     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2700     {
2701         {
2702             log.ensureEmptyLine();
2703             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2704         }
2705         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2706     }
2707     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2708 }
2709
2710 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2711                                 gmx_domdec_t        *dd,
2712                                 real                 dlb_scale,
2713                                 const t_inputrec    *ir,
2714                                 const gmx_ddbox_t   *ddbox)
2715 {
2716     gmx_domdec_comm_t *comm;
2717     int                d, dim, npulse, npulse_d_max, npulse_d;
2718     gmx_bool           bNoCutOff;
2719
2720     comm = dd->comm;
2721
2722     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2723
2724     /* Determine the maximum number of comm. pulses in one dimension */
2725
2726     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2727
2728     /* Determine the maximum required number of grid pulses */
2729     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2730     {
2731         /* Only a single pulse is required */
2732         npulse = 1;
2733     }
2734     else if (!bNoCutOff && comm->cellsize_limit > 0)
2735     {
2736         /* We round down slightly here to avoid overhead due to the latency
2737          * of extra communication calls when the cut-off
2738          * would be only slightly longer than the cell size.
2739          * Later cellsize_limit is redetermined,
2740          * so we can not miss interactions due to this rounding.
2741          */
2742         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2743     }
2744     else
2745     {
2746         /* There is no cell size limit */
2747         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2748     }
2749
2750     if (!bNoCutOff && npulse > 1)
2751     {
2752         /* See if we can do with less pulses, based on dlb_scale */
2753         npulse_d_max = 0;
2754         for (d = 0; d < dd->ndim; d++)
2755         {
2756             dim      = dd->dim[d];
2757             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2758                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2759             npulse_d_max = std::max(npulse_d_max, npulse_d);
2760         }
2761         npulse = std::min(npulse, npulse_d_max);
2762     }
2763
2764     /* This env var can override npulse */
2765     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2766     if (d > 0)
2767     {
2768         npulse = d;
2769     }
2770
2771     comm->maxpulse       = 1;
2772     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2773     for (d = 0; d < dd->ndim; d++)
2774     {
2775         if (comm->ddSettings.request1DAnd1Pulse)
2776         {
2777             comm->cd[d].np_dlb = 1;
2778         }
2779         else
2780         {
2781             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2782             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2783         }
2784         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2785         {
2786             comm->bVacDLBNoLimit = FALSE;
2787         }
2788     }
2789
2790     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2791     if (!comm->bVacDLBNoLimit)
2792     {
2793         comm->cellsize_limit = std::max(comm->cellsize_limit,
2794                                         comm->systemInfo.cutoff/comm->maxpulse);
2795     }
2796     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2797     /* Set the minimum cell size for each DD dimension */
2798     for (d = 0; d < dd->ndim; d++)
2799     {
2800         if (comm->bVacDLBNoLimit ||
2801             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2802         {
2803             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2804         }
2805         else
2806         {
2807             comm->cellsize_min_dlb[dd->dim[d]] =
2808                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2809         }
2810     }
2811     if (comm->cutoff_mbody <= 0)
2812     {
2813         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2814     }
2815     if (isDlbOn(comm))
2816     {
2817         set_dlb_limits(dd);
2818     }
2819 }
2820
2821 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2822 {
2823     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2824 }
2825
2826 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2827 {
2828     /* If each molecule is a single charge group
2829      * or we use domain decomposition for each periodic dimension,
2830      * we do not need to take pbc into account for the bonded interactions.
2831      */
2832     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2833             !(dd->nc[XX] > 1 &&
2834               dd->nc[YY] > 1 &&
2835               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2836 }
2837
2838 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2839 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2840                                   gmx_domdec_t *dd, real dlb_scale,
2841                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2842                                   const gmx_ddbox_t *ddbox)
2843 {
2844     gmx_domdec_comm_t *comm        = dd->comm;
2845     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2846
2847     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2848     {
2849         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2850         if (ddRankSetup.npmedecompdim >= 2)
2851         {
2852             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2853         }
2854     }
2855     else
2856     {
2857         ddRankSetup.numRanksDoingPme = 0;
2858         if (dd->pme_nodeid >= 0)
2859         {
2860             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2861                                  "Can not have separate PME ranks without PME electrostatics");
2862         }
2863     }
2864
2865     if (debug)
2866     {
2867         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2868     }
2869     if (!isDlbDisabled(comm))
2870     {
2871         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2872     }
2873
2874     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2875
2876     real vol_frac;
2877     if (ir->ePBC == epbcNONE)
2878     {
2879         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2880     }
2881     else
2882     {
2883         vol_frac =
2884             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2885     }
2886     if (debug)
2887     {
2888         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2889     }
2890     int natoms_tot = mtop->natoms;
2891
2892     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
2893                                      static_cast<int>(vol_frac*natoms_tot));
2894 }
2895
2896 /*! \brief Get some important DD parameters which can be modified by env.vars */
2897 static DDSettings
2898 getDDSettings(const gmx::MDLogger     &mdlog,
2899               const DomdecOptions     &options,
2900               const gmx::MdrunOptions &mdrunOptions,
2901               const t_inputrec        &ir)
2902 {
2903     DDSettings ddSettings;
2904
2905     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2906     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2907     ddSettings.request1DAnd1Pulse  = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2908     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2909     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2910     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2911     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2912     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2913     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2914     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2915
2916     if (ddSettings.useSendRecv2)
2917     {
2918         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2919     }
2920
2921     if (ddSettings.eFlop)
2922     {
2923         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2924         ddSettings.recordLoad = true;
2925     }
2926     else
2927     {
2928         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2929     }
2930
2931     ddSettings.initialDlbState =
2932         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2933     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2934                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2935
2936     return ddSettings;
2937 }
2938
2939 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2940     unitCellInfo(ir)
2941 {
2942 }
2943
2944 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2945  *
2946  * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2947  * generally requires a larger box than other possible decompositions
2948  * with the same rank count, so the calling code might need to decide
2949  * what is the most appropriate way to run the simulation based on
2950  * whether such a DD is possible.
2951  *
2952  * This function works like init_domain_decomposition(), but will not
2953  * give a fatal error, and only uses \c cr for communicating between
2954  * ranks.
2955  *
2956  * It is safe to call before thread-MPI spawns ranks, so that
2957  * thread-MPI can decide whether and how to trigger the GPU halo
2958  * exchange code path. The number of PME ranks, if any, should be set
2959  * in \c options.numPmeRanks.
2960  */
2961 static bool
2962 canMake1DAnd1PulseDomainDecomposition(const DDSettings              &ddSettingsOriginal,
2963                                       const t_commrec               *cr,
2964                                       const int                      numRanksRequested,
2965                                       const DomdecOptions           &options,
2966                                       const gmx_mtop_t              &mtop,
2967                                       const t_inputrec              &ir,
2968                                       const matrix                   box,
2969                                       gmx::ArrayRef<const gmx::RVec> xGlobal)
2970 {
2971     // Ensure we don't write any output from this checking routine
2972     gmx::MDLogger dummyLogger;
2973
2974     DDSystemInfo  systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2975
2976     DDSettings    ddSettings = ddSettingsOriginal;
2977     ddSettings.request1DAnd1Pulse = true;
2978     const real    gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2979                                                                        !isDlbDisabled(ddSettings.initialDlbState),
2980                                                                        options.dlbScaling, ir,
2981                                                                        systemInfo.cellsizeLimit);
2982     gmx_ddbox_t ddbox       = {0};
2983     DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2984                                              ddSettings, systemInfo, gridSetupCellsizeLimit,
2985                                              mtop, ir, box, xGlobal, &ddbox);
2986
2987     const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2988
2989     return canMakeDDWith1DAnd1Pulse;
2990 }
2991
2992 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2993 {
2994     const int  maxDimensionSize             = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2995     const int  productOfDimensionSizes      = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2996     const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2997
2998     const bool hasMax1Pulse =
2999         ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3000          (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3001
3002     return decompositionHasOneDimension && hasMax1Pulse;
3003
3004 }
3005
3006 namespace gmx
3007 {
3008
3009 // TODO once the functionality stablizes, move this class and
3010 // supporting functionality into builder.cpp
3011 /*! \brief Impl class for DD builder */
3012 class DomainDecompositionBuilder::Impl
3013 {
3014     public:
3015         //! Constructor
3016         Impl(const MDLogger      &mdlog,
3017              t_commrec           *cr,
3018              const DomdecOptions &options,
3019              const MdrunOptions  &mdrunOptions,
3020              bool                 prefer1DAnd1Pulse,
3021              const gmx_mtop_t    &mtop,
3022              const t_inputrec    &ir,
3023              const matrix         box,
3024              ArrayRef<const RVec> xGlobal);
3025
3026         //! Build the resulting DD manager
3027         gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3028
3029         //! Objects used in constructing and configuring DD
3030         //! {
3031         //! Logging object
3032         const MDLogger      &mdlog_;
3033         //! Communication object
3034         t_commrec           *cr_;
3035         //! User-supplied options configuring DD behavior
3036         const DomdecOptions  options_;
3037         //! Global system topology
3038         const gmx_mtop_t    &mtop_;
3039         //! User input values from the tpr file
3040         const t_inputrec    &ir_;
3041         //! }
3042
3043         //! Internal objects used in constructing DD
3044         //! {
3045         //! Settings combined from the user input
3046         DDSettings           ddSettings_;
3047         //! Information derived from the simulation system
3048         DDSystemInfo         systemInfo_;
3049         //! Box structure
3050         gmx_ddbox_t          ddbox_ = { 0 };
3051         //! Organization of the DD grids
3052         DDGridSetup          ddGridSetup_;
3053         //! Organzation of the DD ranks
3054         DDRankSetup          ddRankSetup_;
3055         //! Number of DD cells in each dimension
3056         ivec                 ddCellIndex_ = { 0, 0, 0 };
3057         //! IDs of PME-only ranks
3058         std::vector<int>     pmeRanks_;
3059         //! Contains a valid Cartesian-communicator-based setup, or defaults.
3060         CartesianRankSetup   cartSetup_;
3061         //! }
3062
3063 };
3064
3065 DomainDecompositionBuilder::Impl::Impl(const MDLogger      &mdlog,
3066                                        t_commrec           *cr,
3067                                        const DomdecOptions &options,
3068                                        const MdrunOptions  &mdrunOptions,
3069                                        const bool           prefer1DAnd1Pulse,
3070                                        const gmx_mtop_t    &mtop,
3071                                        const t_inputrec    &ir,
3072                                        const matrix         box,
3073                                        ArrayRef<const RVec> xGlobal)
3074     : mdlog_(mdlog),
3075       cr_(cr),
3076       options_(options),
3077       mtop_(mtop),
3078       ir_(ir)
3079 {
3080     GMX_LOG(mdlog_.info).appendTextFormatted(
3081             "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3082
3083     ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3084
3085     if (prefer1DAnd1Pulse &&
3086         canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3087                                               mtop_, ir_, box, xGlobal))
3088     {
3089         ddSettings_.request1DAnd1Pulse = true;
3090     }
3091
3092     if (ddSettings_.eFlop > 1)
3093     {
3094         /* Ensure that we have different random flop counts on different ranks */
3095         srand(1 + cr_->nodeid);
3096     }
3097
3098     systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3099
3100     const int numRanksRequested = cr_->nnodes;
3101     checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3102
3103     // DD grid setup uses a more different cell size limit for
3104     // automated setup than the one in systemInfo_. The latter is used
3105     // in set_dd_limits() to configure DLB, for example.
3106     const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3107                                                                     !isDlbDisabled(ddSettings_.initialDlbState),
3108                                                                     options_.dlbScaling, ir_,
3109                                                                     systemInfo_.cellsizeLimit);
3110     ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3111                                   ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3112                                   mtop_, ir_, box, xGlobal, &ddbox_);
3113     checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3114
3115     cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3116
3117     ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3118
3119     /* Generate the group communicator, also decides the duty of each rank */
3120     cartSetup_   =
3121         makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3122                                ddRankSetup_, cr_,
3123                                ddCellIndex_, &pmeRanks_);
3124 }
3125
3126 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3127 {
3128     gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3129
3130     copy_ivec(ddCellIndex_, dd->ci);
3131
3132     dd->comm = init_dd_comm();
3133
3134     dd->comm->ddRankSetup        = ddRankSetup_;
3135     dd->comm->cartesianRankSetup = cartSetup_;
3136
3137     set_dd_limits(mdlog_, cr_, dd, options_,
3138                   ddSettings_, systemInfo_,
3139                   ddGridSetup_,
3140                   ddRankSetup_.numPPRanks,
3141                   &mtop_, &ir_,
3142                   ddbox_);
3143
3144     setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3145
3146     if (thisRankHasDuty(cr_, DUTY_PP))
3147     {
3148         set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3149
3150         setup_neighbor_relations(dd);
3151     }
3152
3153     /* Set overallocation to avoid frequent reallocation of arrays */
3154     set_over_alloc_dd(TRUE);
3155
3156     dd->atomSets = atomSets;
3157
3158     return dd;
3159 }
3160
3161 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger      &mdlog,
3162                                                        t_commrec           *cr,
3163                                                        const DomdecOptions &options,
3164                                                        const MdrunOptions  &mdrunOptions,
3165                                                        const bool           prefer1DAnd1Pulse,
3166                                                        const gmx_mtop_t    &mtop,
3167                                                        const t_inputrec    &ir,
3168                                                        const matrix         box,
3169                                                        ArrayRef<const RVec> xGlobal)
3170     : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3171 {
3172 }
3173
3174 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3175 {
3176     return impl_->build(atomSets);
3177 }
3178
3179 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3180
3181 } // namespace gmx
3182
3183 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3184                                const matrix                   box,
3185                                gmx::ArrayRef<const gmx::RVec> x,
3186                                real                           cutoffRequested)
3187 {
3188     gmx_domdec_t *dd;
3189     gmx_ddbox_t   ddbox;
3190     int           d, dim, np;
3191     real          inv_cell_size;
3192     int           LocallyLimited;
3193
3194     dd = cr->dd;
3195
3196     set_ddbox(*dd, false, box, true, x, &ddbox);
3197
3198     LocallyLimited = 0;
3199
3200     for (d = 0; d < dd->ndim; d++)
3201     {
3202         dim = dd->dim[d];
3203
3204         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3205         if (dd->unitCellInfo.ddBoxIsDynamic)
3206         {
3207             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3208         }
3209
3210         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3211
3212         if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3213         {
3214             return FALSE;
3215         }
3216
3217         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3218         {
3219             if (np > dd->comm->cd[d].np_dlb)
3220             {
3221                 return FALSE;
3222             }
3223
3224             /* If a current local cell size is smaller than the requested
3225              * cut-off, we could still fix it, but this gets very complicated.
3226              * Without fixing here, we might actually need more checks.
3227              */
3228             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3229             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3230             {
3231                 LocallyLimited = 1;
3232             }
3233         }
3234     }
3235
3236     if (!isDlbDisabled(dd->comm))
3237     {
3238         /* If DLB is not active yet, we don't need to check the grid jumps.
3239          * Actually we shouldn't, because then the grid jump data is not set.
3240          */
3241         if (isDlbOn(dd->comm) &&
3242             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3243         {
3244             LocallyLimited = 1;
3245         }
3246
3247         gmx_sumi(1, &LocallyLimited, cr);
3248
3249         if (LocallyLimited > 0)
3250         {
3251             return FALSE;
3252         }
3253     }
3254
3255     return TRUE;
3256 }
3257
3258 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3259                           const matrix                   box,
3260                           gmx::ArrayRef<const gmx::RVec> x,
3261                           real                           cutoffRequested)
3262 {
3263     gmx_bool bCutoffAllowed;
3264
3265     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3266
3267     if (bCutoffAllowed)
3268     {
3269         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3270     }
3271
3272     return bCutoffAllowed;
3273 }