src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/collect.h"
  54 #include "gromacs/domdec/dlb.h"
  55 #include "gromacs/domdec/dlbtiming.h"
  56 #include "gromacs/domdec/domdec_network.h"
  57 #include "gromacs/domdec/ga2la.h"
  58 #include "gromacs/domdec/gpuhaloexchange.h"
  59 #include "gromacs/domdec/options.h"
  60 #include "gromacs/domdec/partition.h"
  61 #include "gromacs/gmxlib/network.h"
  62 #include "gromacs/gmxlib/nrnb.h"
  63 #include "gromacs/gpu_utils/gpu_utils.h"
  64 #include "gromacs/hardware/hw_info.h"
  65 #include "gromacs/listed_forces/manage_threading.h"
  66 #include "gromacs/math/vec.h"
  67 #include "gromacs/math/vectypes.h"
  68 #include "gromacs/mdlib/calc_verletbuf.h"
  69 #include "gromacs/mdlib/constr.h"
  70 #include "gromacs/mdlib/constraintrange.h"
  71 #include "gromacs/mdlib/updategroups.h"
  72 #include "gromacs/mdlib/vsite.h"
  73 #include "gromacs/mdtypes/commrec.h"
  74 #include "gromacs/mdtypes/forceoutput.h"
  75 #include "gromacs/mdtypes/inputrec.h"
  76 #include "gromacs/mdtypes/mdrunoptions.h"
  77 #include "gromacs/mdtypes/state.h"
  78 #include "gromacs/pbcutil/ishift.h"
  79 #include "gromacs/pbcutil/pbc.h"
  80 #include "gromacs/pulling/pull.h"
  81 #include "gromacs/timing/wallcycle.h"
  82 #include "gromacs/topology/block.h"
  83 #include "gromacs/topology/idef.h"
  84 #include "gromacs/topology/ifunc.h"
  85 #include "gromacs/topology/mtop_lookup.h"
  86 #include "gromacs/topology/mtop_util.h"
  87 #include "gromacs/topology/topology.h"
  88 #include "gromacs/utility/basedefinitions.h"
  89 #include "gromacs/utility/basenetwork.h"
  90 #include "gromacs/utility/cstringutil.h"
  91 #include "gromacs/utility/exceptions.h"
  92 #include "gromacs/utility/fatalerror.h"
  93 #include "gromacs/utility/gmxmpi.h"
  94 #include "gromacs/utility/logger.h"
  95 #include "gromacs/utility/real.h"
  96 #include "gromacs/utility/smalloc.h"
  97 #include "gromacs/utility/strconvert.h"
  98 #include "gromacs/utility/stringstream.h"
  99 #include "gromacs/utility/stringutil.h"
 100 #include "gromacs/utility/textwriter.h"
 101
 102 #include "atomdistribution.h"
 103 #include "box.h"
 104 #include "cellsizes.h"
 105 #include "distribute.h"
 106 #include "domdec_constraints.h"
 107 #include "domdec_internal.h"
 108 #include "domdec_setup.h"
 109 #include "domdec_vsite.h"
 110 #include "redistribute.h"
 111 #include "utility.h"
 112
 113 // TODO remove this when moving domdec into gmx namespace
 114 using gmx::DdRankOrder;
 115 using gmx::DlbOption;
 116 using gmx::DomdecOptions;
 117
 118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 119
 120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 121 #define DD_CGIBS 2
 122
 123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 124 #define DD_FLAG_NRCG  65535
 125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 127
 128 /* The DD zone order */
 129 static const ivec dd_zo[DD_MAXZONE] =
 130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 131
 132 /* The non-bonded zone-pair setup for domain decomposition
 133  * The first number is the i-zone, the second number the first j-zone seen by
 134  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 135  * As is, this is for 3D decomposition, where there are 4 i-zones.
 136  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 137  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 138  */
 139 static const int
 140     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 141                                                  {1, 3, 6},
 142                                                  {2, 5, 6},
 143                                                  {3, 5, 7}};
 144
 145
 146 /*
 147    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 148
 149    static void index2xyz(ivec nc,int ind,ivec xyz)
 150    {
 151    xyz[XX] = ind % nc[XX];
 152    xyz[YY] = (ind / nc[XX]) % nc[YY];
 153    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 154    }
 155  */
 156
 157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 158 {
 159     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 160     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 161     xyz[ZZ] = ind % nc[ZZ];
 162 }
 163
 164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 165 {
 166     int                       ddnodeid = -1;
 167
 168     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 169     const int                 ddindex   = dd_index(dd->nc, c);
 170     if (cartSetup.bCartesianPP_PME)
 171     {
 172         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 173     }
 174     else if (cartSetup.bCartesianPP)
 175     {
 176 #if GMX_MPI
 177         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 178 #endif
 179     }
 180     else
 181     {
 182         ddnodeid = ddindex;
 183     }
 184
 185     return ddnodeid;
 186 }
 187
 188 int ddglatnr(const gmx_domdec_t *dd, int i)
 189 {
 190     int atnr;
 191
 192     if (dd == nullptr)
 193     {
 194         atnr = i + 1;
 195     }
 196     else
 197     {
 198         if (i >= dd->comm->atomRanges.numAtomsTotal())
 199         {
 200             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 201         }
 202         atnr = dd->globalAtomIndices[i] + 1;
 203     }
 204
 205     return atnr;
 206 }
 207
 208 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 209 {
 210     return &dd->comm->cgs_gl;
 211 }
 212
 213 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 214 {
 215     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 216     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 217 }
 218
 219 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 220 {
 221     int i;
 222
 223     if (state->ddp_count != dd->ddp_count)
 224     {
 225         gmx_incons("The MD state does not match the domain decomposition state");
 226     }
 227
 228     state->cg_gl.resize(dd->ncg_home);
 229     for (i = 0; i < dd->ncg_home; i++)
 230     {
 231         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 232     }
 233
 234     state->ddp_count_cg_gl = dd->ddp_count;
 235 }
 236
 237 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 238 {
 239     return &dd->comm->zones;
 240 }
 241
 242 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
 243                       int *jcg0, int *jcg1, ivec shift0, ivec shift1)
 244 {
 245     gmx_domdec_zones_t *zones;
 246     int                 izone, d, dim;
 247
 248     zones = &dd->comm->zones;
 249
 250     izone = 0;
 251     while (icg >= zones->izone[izone].cg1)
 252     {
 253         izone++;
 254     }
 255
 256     if (izone == 0)
 257     {
 258         *jcg0 = icg;
 259     }
 260     else if (izone < zones->nizone)
 261     {
 262         *jcg0 = zones->izone[izone].jcg0;
 263     }
 264     else
 265     {
 266         gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
 267                   icg, izone, zones->nizone);
 268     }
 269
 270     *jcg1 = zones->izone[izone].jcg1;
 271
 272     for (d = 0; d < dd->ndim; d++)
 273     {
 274         dim         = dd->dim[d];
 275         shift0[dim] = zones->izone[izone].shift0[dim];
 276         shift1[dim] = zones->izone[izone].shift1[dim];
 277         if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
 278         {
 279             /* A conservative approach, this can be optimized */
 280             shift0[dim] -= 1;
 281             shift1[dim] += 1;
 282         }
 283     }
 284 }
 285
 286 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 287 {
 288     return dd.comm->atomRanges.numHomeAtoms();
 289 }
 290
 291 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 292 {
 293     /* We currently set mdatoms entries for all atoms:
 294      * local + non-local + communicated for vsite + constraints
 295      */
 296
 297     return dd->comm->atomRanges.numAtomsTotal();
 298 }
 299
 300 int dd_natoms_vsite(const gmx_domdec_t *dd)
 301 {
 302     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 303 }
 304
 305 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 306 {
 307     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 308     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 309 }
 310
 311 void dd_move_x(gmx_domdec_t             *dd,
 312                const matrix              box,
 313                gmx::ArrayRef<gmx::RVec>  x,
 314                gmx_wallcycle            *wcycle)
 315 {
 316     wallcycle_start(wcycle, ewcMOVEX);
 317
 318     int                    nzone, nat_tot;
 319     gmx_domdec_comm_t     *comm;
 320     gmx_domdec_comm_dim_t *cd;
 321     rvec                   shift = {0, 0, 0};
 322     gmx_bool               bPBC, bScrew;
 323
 324     comm = dd->comm;
 325
 326     nzone   = 1;
 327     nat_tot = comm->atomRanges.numHomeAtoms();
 328     for (int d = 0; d < dd->ndim; d++)
 329     {
 330         bPBC   = (dd->ci[dd->dim[d]] == 0);
 331         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 332         if (bPBC)
 333         {
 334             copy_rvec(box[dd->dim[d]], shift);
 335         }
 336         cd = &comm->cd[d];
 337         for (const gmx_domdec_ind_t &ind : cd->ind)
 338         {
 339             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 340             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 341             int                        n          = 0;
 342             if (!bPBC)
 343             {
 344                 for (int j : ind.index)
 345                 {
 346                     sendBuffer[n] = x[j];
 347                     n++;
 348                 }
 349             }
 350             else if (!bScrew)
 351             {
 352                 for (int j : ind.index)
 353                 {
 354                     /* We need to shift the coordinates */
 355                     for (int d = 0; d < DIM; d++)
 356                     {
 357                         sendBuffer[n][d] = x[j][d] + shift[d];
 358                     }
 359                     n++;
 360                 }
 361             }
 362             else
 363             {
 364                 for (int j : ind.index)
 365                 {
 366                     /* Shift x */
 367                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 368                     /* Rotate y and z.
 369                      * This operation requires a special shift force
 370                      * treatment, which is performed in calc_vir.
 371                      */
 372                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 373                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 374                     n++;
 375                 }
 376             }
 377
 378             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 379
 380             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 381             if (cd->receiveInPlace)
 382             {
 383                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 384             }
 385             else
 386             {
 387                 receiveBuffer = receiveBufferAccess.buffer;
 388             }
 389             /* Send and receive the coordinates */
 390             ddSendrecv(dd, d, dddirBackward,
 391                        sendBuffer, receiveBuffer);
 392
 393             if (!cd->receiveInPlace)
 394             {
 395                 int j = 0;
 396                 for (int zone = 0; zone < nzone; zone++)
 397                 {
 398                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 399                     {
 400                         x[i] = receiveBuffer[j++];
 401                     }
 402                 }
 403             }
 404             nat_tot += ind.nrecv[nzone+1];
 405         }
 406         nzone += nzone;
 407     }
 408
 409     wallcycle_stop(wcycle, ewcMOVEX);
 410 }
 411
 412 void dd_move_f(gmx_domdec_t              *dd,
 413                gmx::ForceWithShiftForces *forceWithShiftForces,
 414                gmx_wallcycle             *wcycle)
 415 {
 416     wallcycle_start(wcycle, ewcMOVEF);
 417
 418     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 419     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 420
 421     gmx_domdec_comm_t       &comm    = *dd->comm;
 422     int                      nzone   = comm.zones.n/2;
 423     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 424     for (int d = dd->ndim-1; d >= 0; d--)
 425     {
 426         /* Only forces in domains near the PBC boundaries need to
 427            consider PBC in the treatment of fshift */
 428         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 429         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 430         /* Determine which shift vector we need */
 431         ivec       vis   = { 0, 0, 0 };
 432         vis[dd->dim[d]]  = 1;
 433         const int  is    = IVEC2IS(vis);
 434
 435         /* Loop over the pulses */
 436         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 437         for (int p = cd.numPulses() - 1; p >= 0; p--)
 438         {
 439             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 440             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 441             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 442
 443             nat_tot                                 -= ind.nrecv[nzone+1];
 444
 445             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 446
 447             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 448             if (cd.receiveInPlace)
 449             {
 450                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 451             }
 452             else
 453             {
 454                 sendBuffer = sendBufferAccess.buffer;
 455                 int j = 0;
 456                 for (int zone = 0; zone < nzone; zone++)
 457                 {
 458                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 459                     {
 460                         sendBuffer[j++] = f[i];
 461                     }
 462                 }
 463             }
 464             /* Communicate the forces */
 465             ddSendrecv(dd, d, dddirForward,
 466                        sendBuffer, receiveBuffer);
 467             /* Add the received forces */
 468             int n = 0;
 469             if (!shiftForcesNeedPbc)
 470             {
 471                 for (int j : ind.index)
 472                 {
 473                     for (int d = 0; d < DIM; d++)
 474                     {
 475                         f[j][d] += receiveBuffer[n][d];
 476                     }
 477                     n++;
 478                 }
 479             }
 480             else if (!applyScrewPbc)
 481             {
 482                 for (int j : ind.index)
 483                 {
 484                     for (int d = 0; d < DIM; d++)
 485                     {
 486                         f[j][d] += receiveBuffer[n][d];
 487                     }
 488                     /* Add this force to the shift force */
 489                     for (int d = 0; d < DIM; d++)
 490                     {
 491                         fshift[is][d] += receiveBuffer[n][d];
 492                     }
 493                     n++;
 494                 }
 495             }
 496             else
 497             {
 498                 for (int j : ind.index)
 499                 {
 500                     /* Rotate the force */
 501                     f[j][XX] += receiveBuffer[n][XX];
 502                     f[j][YY] -= receiveBuffer[n][YY];
 503                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 504                     if (shiftForcesNeedPbc)
 505                     {
 506                         /* Add this force to the shift force */
 507                         for (int d = 0; d < DIM; d++)
 508                         {
 509                             fshift[is][d] += receiveBuffer[n][d];
 510                         }
 511                     }
 512                     n++;
 513                 }
 514             }
 515         }
 516         nzone /= 2;
 517     }
 518     wallcycle_stop(wcycle, ewcMOVEF);
 519 }
 520
 521 /* Convenience function for extracting a real buffer from an rvec buffer
 522  *
 523  * To reduce the number of temporary communication buffers and avoid
 524  * cache polution, we reuse gmx::RVec buffers for storing reals.
 525  * This functions return a real buffer reference with the same number
 526  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 527  */
 528 static gmx::ArrayRef<real>
 529 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 530 {
 531     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 532                                   arrayRef.size());
 533 }
 534
 535 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 536 {
 537     int                    nzone, nat_tot;
 538     gmx_domdec_comm_t     *comm;
 539     gmx_domdec_comm_dim_t *cd;
 540
 541     comm = dd->comm;
 542
 543     nzone   = 1;
 544     nat_tot = comm->atomRanges.numHomeAtoms();
 545     for (int d = 0; d < dd->ndim; d++)
 546     {
 547         cd = &comm->cd[d];
 548         for (const gmx_domdec_ind_t &ind : cd->ind)
 549         {
 550             /* Note: We provision for RVec instead of real, so a factor of 3
 551              * more than needed. The buffer actually already has this size
 552              * and we pass a plain pointer below, so this does not matter.
 553              */
 554             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 555             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 556             int                       n          = 0;
 557             for (int j : ind.index)
 558             {
 559                 sendBuffer[n++] = v[j];
 560             }
 561
 562             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 563
 564             gmx::ArrayRef<real>       receiveBuffer;
 565             if (cd->receiveInPlace)
 566             {
 567                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 568             }
 569             else
 570             {
 571                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 572             }
 573             /* Send and receive the data */
 574             ddSendrecv(dd, d, dddirBackward,
 575                        sendBuffer, receiveBuffer);
 576             if (!cd->receiveInPlace)
 577             {
 578                 int j = 0;
 579                 for (int zone = 0; zone < nzone; zone++)
 580                 {
 581                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 582                     {
 583                         v[i] = receiveBuffer[j++];
 584                     }
 585                 }
 586             }
 587             nat_tot += ind.nrecv[nzone+1];
 588         }
 589         nzone += nzone;
 590     }
 591 }
 592
 593 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 594 {
 595     int                    nzone, nat_tot;
 596     gmx_domdec_comm_t     *comm;
 597     gmx_domdec_comm_dim_t *cd;
 598
 599     comm = dd->comm;
 600
 601     nzone   = comm->zones.n/2;
 602     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 603     for (int d = dd->ndim-1; d >= 0; d--)
 604     {
 605         cd = &comm->cd[d];
 606         for (int p = cd->numPulses() - 1; p >= 0; p--)
 607         {
 608             const gmx_domdec_ind_t &ind = cd->ind[p];
 609
 610             /* Note: We provision for RVec instead of real, so a factor of 3
 611              * more than needed. The buffer actually already has this size
 612              * and we typecast, so this works as intended.
 613              */
 614             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 615             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 616             nat_tot -= ind.nrecv[nzone + 1];
 617
 618             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 619
 620             gmx::ArrayRef<real>       sendBuffer;
 621             if (cd->receiveInPlace)
 622             {
 623                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 624             }
 625             else
 626             {
 627                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 628                 int j = 0;
 629                 for (int zone = 0; zone < nzone; zone++)
 630                 {
 631                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 632                     {
 633                         sendBuffer[j++] = v[i];
 634                     }
 635                 }
 636             }
 637             /* Communicate the forces */
 638             ddSendrecv(dd, d, dddirForward,
 639                        sendBuffer, receiveBuffer);
 640             /* Add the received forces */
 641             int n = 0;
 642             for (int j : ind.index)
 643             {
 644                 v[j] += receiveBuffer[n];
 645                 n++;
 646             }
 647         }
 648         nzone /= 2;
 649     }
 650 }
 651
 652 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 653 {
 654     const gmx_domdec_comm_t &comm       = *dd->comm;
 655     const DDSystemInfo      &systemInfo = comm.systemInfo;
 656
 657     real                     r = -1;
 658     if (systemInfo.haveInterDomainMultiBodyBondeds)
 659     {
 660         if (comm.cutoff_mbody > 0)
 661         {
 662             r = comm.cutoff_mbody;
 663         }
 664         else
 665         {
 666             /* cutoff_mbody=0 means we do not have DLB */
 667             r = comm.cellsize_min[dd->dim[0]];
 668             for (int di = 1; di < dd->ndim; di++)
 669             {
 670                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 671             }
 672             if (comm.systemInfo.filterBondedCommunication)
 673             {
 674                 r = std::max(r, comm.cutoff_mbody);
 675             }
 676             else
 677             {
 678                 r = std::min(r, systemInfo.cutoff);
 679             }
 680         }
 681     }
 682
 683     return r;
 684 }
 685
 686 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 687 {
 688     real r_mb;
 689
 690     r_mb = dd_cutoff_multibody(dd);
 691
 692     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 693 }
 694
 695
 696 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 697                                    const CartesianRankSetup &cartSetup,
 698                                    const ivec                coord,
 699                                    ivec                      coord_pme)
 700 {
 701     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 702     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 703     copy_ivec(coord, coord_pme);
 704     coord_pme[cartSetup.cartpmedim] =
 705         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 706 }
 707
 708 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 709 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 710                             const int          ddCellIndex)
 711 {
 712     const int npp  = ddRankSetup.numPPRanks;
 713     const int npme = ddRankSetup.numRanksDoingPme;
 714
 715     /* Here we assign a PME node to communicate with this DD node
 716      * by assuming that the major index of both is x.
 717      * We add npme/2 to obtain an even distribution.
 718      */
 719     return (ddCellIndex*npme + npme/2)/npp;
 720 }
 721
 722 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 723 {
 724     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 725
 726     int              n = 0;
 727     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 728     {
 729         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 730         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 731         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 732         {
 733             if (debug)
 734             {
 735                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 736             }
 737             pmeRanks[n] = i + 1 + n;
 738             n++;
 739         }
 740     }
 741
 742     return pmeRanks;
 743 }
 744
 745 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 746 {
 747     gmx_domdec_t *dd;
 748     ivec          coords;
 749     int           slab;
 750
 751     dd = cr->dd;
 752     /*
 753        if (dd->comm->bCartesian) {
 754        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 755        dd_coords2pmecoords(dd,coords,coords_pme);
 756        copy_ivec(dd->ntot,nc);
 757        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 758        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 759
 760        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 761        } else {
 762        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 763        }
 764      */
 765     coords[XX] = x;
 766     coords[YY] = y;
 767     coords[ZZ] = z;
 768     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 769
 770     return slab;
 771 }
 772
 773 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 774 {
 775     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 776     ivec                      coords    = { x, y, z };
 777     int                       nodeid    = -1;
 778
 779     if (cartSetup.bCartesianPP_PME)
 780     {
 781 #if GMX_MPI
 782         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 783 #endif
 784     }
 785     else
 786     {
 787         int ddindex = dd_index(cr->dd->nc, coords);
 788         if (cartSetup.bCartesianPP)
 789         {
 790             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 791         }
 792         else
 793         {
 794             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 795             {
 796                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 797             }
 798             else
 799             {
 800                 nodeid = ddindex;
 801             }
 802         }
 803     }
 804
 805     return nodeid;
 806 }
 807
 808 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 809                               const CartesianRankSetup   &cartSetup,
 810                               gmx::ArrayRef<const int>    pmeRanks,
 811                               const t_commrec gmx_unused *cr,
 812                               const int                   sim_nodeid)
 813 {
 814     int                pmenode = -1;
 815
 816     /* This assumes a uniform x domain decomposition grid cell size */
 817     if (cartSetup.bCartesianPP_PME)
 818     {
 819 #if GMX_MPI
 820         ivec coord, coord_pme;
 821         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 822         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 823         {
 824             /* This is a PP rank */
 825             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 826             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 827         }
 828 #endif
 829     }
 830     else if (cartSetup.bCartesianPP)
 831     {
 832         if (sim_nodeid < ddRankSetup.numPPRanks)
 833         {
 834             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 835         }
 836     }
 837     else
 838     {
 839         /* This assumes DD cells with identical x coordinates
 840          * are numbered sequentially.
 841          */
 842         if (pmeRanks.empty())
 843         {
 844             if (sim_nodeid < ddRankSetup.numPPRanks)
 845             {
 846                 /* The DD index equals the nodeid */
 847                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 848             }
 849         }
 850         else
 851         {
 852             int i = 0;
 853             while (sim_nodeid > pmeRanks[i])
 854             {
 855                 i++;
 856             }
 857             if (sim_nodeid < pmeRanks[i])
 858             {
 859                 pmenode = pmeRanks[i];
 860             }
 861         }
 862     }
 863
 864     return pmenode;
 865 }
 866
 867 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 868 {
 869     if (dd != nullptr)
 870     {
 871         return {
 872                    dd->comm->ddRankSetup.npmenodes_x,
 873                    dd->comm->ddRankSetup.npmenodes_y
 874         };
 875     }
 876     else
 877     {
 878         return {
 879                    1, 1
 880         };
 881     }
 882 }
 883
 884 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 885                                  const int        pmenodeid)
 886 {
 887     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 888     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 889     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 890     std::vector<int>          ddranks;
 891     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 892
 893     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 894     {
 895         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 896         {
 897             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 898             {
 899                 if (cartSetup.bCartesianPP_PME)
 900                 {
 901                     ivec coord = { x, y, z };
 902                     ivec coord_pme;
 903                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 904                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 905                         cr->dd->ci[YY] == coord_pme[YY] &&
 906                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 907                     {
 908                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 909                     }
 910                 }
 911                 else
 912                 {
 913                     /* The slab corresponds to the nodeid in the PME group */
 914                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 915                     {
 916                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 917                     }
 918                 }
 919             }
 920         }
 921     }
 922     return ddranks;
 923 }
 924
 925 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 926                                  gmx::ArrayRef<const int>  pmeRanks,
 927                                  const t_commrec          *cr)
 928 {
 929     gmx_bool           bReceive    = TRUE;
 930
 931     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 932     if (ddRankSetup.usePmeOnlyRanks)
 933     {
 934         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 935         if (cartSetup.bCartesianPP_PME)
 936         {
 937 #if GMX_MPI
 938             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 939             ivec coords;
 940             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 941             coords[cartSetup.cartpmedim]++;
 942             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 943             {
 944                 int rank;
 945                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 946                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 947                 {
 948                     /* This is not the last PP node for pmenode */
 949                     bReceive = FALSE;
 950                 }
 951             }
 952 #else
 953             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 954 #endif
 955         }
 956         else
 957         {
 958             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 959             if (cr->sim_nodeid+1 < cr->nnodes &&
 960                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 961             {
 962                 /* This is not the last PP node for pmenode */
 963                 bReceive = FALSE;
 964             }
 965         }
 966     }
 967
 968     return bReceive;
 969 }
 970
 971 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 972 {
 973     gmx_domdec_comm_t *comm;
 974     int                i;
 975
 976     comm = dd->comm;
 977
 978     snew(*dim_f, dd->nc[dim]+1);
 979     (*dim_f)[0] = 0;
 980     for (i = 1; i < dd->nc[dim]; i++)
 981     {
 982         if (comm->slb_frac[dim])
 983         {
 984             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 985         }
 986         else
 987         {
 988             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 989         }
 990     }
 991     (*dim_f)[dd->nc[dim]] = 1;
 992 }
 993
 994 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 995 {
 996     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 997
 998     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 999     {
1000         ddpme->dim = YY;
1001     }
1002     else
1003     {
1004         ddpme->dim = dimind;
1005     }
1006     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1007
1008     ddpme->nslab = (ddpme->dim == 0 ?
1009                     ddRankSetup.npmenodes_x :
1010                     ddRankSetup.npmenodes_y);
1011
1012     if (ddpme->nslab <= 1)
1013     {
1014         return;
1015     }
1016
1017     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
1018     /* Determine for each PME slab the PP location range for dimension dim */
1019     snew(ddpme->pp_min, ddpme->nslab);
1020     snew(ddpme->pp_max, ddpme->nslab);
1021     for (int slab = 0; slab < ddpme->nslab; slab++)
1022     {
1023         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1024         ddpme->pp_max[slab] = 0;
1025     }
1026     for (int i = 0; i < dd->nnodes; i++)
1027     {
1028         ivec xyz;
1029         ddindex2xyz(dd->nc, i, xyz);
1030         /* For y only use our y/z slab.
1031          * This assumes that the PME x grid size matches the DD grid size.
1032          */
1033         if (dimind == 0 || xyz[XX] == dd->ci[XX])
1034         {
1035             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
1036             int       slab;
1037             if (dimind == 0)
1038             {
1039                 slab = pmeindex/nso;
1040             }
1041             else
1042             {
1043                 slab = pmeindex % ddpme->nslab;
1044             }
1045             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1046             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1047         }
1048     }
1049
1050     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1051 }
1052
1053 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1054 {
1055     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1056
1057     if (ddRankSetup.ddpme[0].dim == XX)
1058     {
1059         return ddRankSetup.ddpme[0].maxshift;
1060     }
1061     else
1062     {
1063         return 0;
1064     }
1065 }
1066
1067 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1068 {
1069     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1070
1071     if (ddRankSetup.ddpme[0].dim == YY)
1072     {
1073         return ddRankSetup.ddpme[0].maxshift;
1074     }
1075     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1076     {
1077         return ddRankSetup.ddpme[1].maxshift;
1078     }
1079     else
1080     {
1081         return 0;
1082     }
1083 }
1084
1085 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1086 {
1087     return dd.comm->systemInfo.haveSplitConstraints;
1088 }
1089
1090 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1091 {
1092     /* Note that the cycles value can be incorrect, either 0 or some
1093      * extremely large value, when our thread migrated to another core
1094      * with an unsynchronized cycle counter. If this happens less often
1095      * that once per nstlist steps, this will not cause issues, since
1096      * we later subtract the maximum value from the sum over nstlist steps.
1097      * A zero count will slightly lower the total, but that's a small effect.
1098      * Note that the main purpose of the subtraction of the maximum value
1099      * is to avoid throwing off the load balancing when stalls occur due
1100      * e.g. system activity or network congestion.
1101      */
1102     dd->comm->cycl[ddCycl] += cycles;
1103     dd->comm->cycl_n[ddCycl]++;
1104     if (cycles > dd->comm->cycl_max[ddCycl])
1105     {
1106         dd->comm->cycl_max[ddCycl] = cycles;
1107     }
1108 }
1109
1110 #if GMX_MPI
1111 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1112 {
1113     MPI_Comm           c_row;
1114     int                dim, i, rank;
1115     ivec               loc_c;
1116     gmx_bool           bPartOfGroup = FALSE;
1117
1118     dim = dd->dim[dim_ind];
1119     copy_ivec(loc, loc_c);
1120     for (i = 0; i < dd->nc[dim]; i++)
1121     {
1122         loc_c[dim] = i;
1123         rank       = dd_index(dd->nc, loc_c);
1124         if (rank == dd->rank)
1125         {
1126             /* This process is part of the group */
1127             bPartOfGroup = TRUE;
1128         }
1129     }
1130     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1131                    &c_row);
1132     if (bPartOfGroup)
1133     {
1134         dd->comm->mpi_comm_load[dim_ind] = c_row;
1135         if (!isDlbDisabled(dd->comm))
1136         {
1137             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1138
1139             if (dd->ci[dim] == dd->master_ci[dim])
1140             {
1141                 /* This is the root process of this row */
1142                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1143
1144                 RowMaster &rowMaster = *cellsizes.rowMaster;
1145                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1146                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1147                 rowMaster.isCellMin.resize(dd->nc[dim]);
1148                 if (dim_ind > 0)
1149                 {
1150                     rowMaster.bounds.resize(dd->nc[dim]);
1151                 }
1152                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1153             }
1154             else
1155             {
1156                 /* This is not a root process, we only need to receive cell_f */
1157                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1158             }
1159         }
1160         if (dd->ci[dim] == dd->master_ci[dim])
1161         {
1162             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1163         }
1164     }
1165 }
1166 #endif
1167
1168 void dd_setup_dlb_resource_sharing(t_commrec            *cr,
1169                                    int                   gpu_id)
1170 {
1171 #if GMX_MPI
1172     int           physicalnode_id_hash;
1173     gmx_domdec_t *dd;
1174     MPI_Comm      mpi_comm_pp_physicalnode;
1175
1176     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1177     {
1178         /* Only ranks with short-ranged tasks (currently) use GPUs.
1179          * If we don't have GPUs assigned, there are no resources to share.
1180          */
1181         return;
1182     }
1183
1184     physicalnode_id_hash = gmx_physicalnode_id_hash();
1185
1186     dd = cr->dd;
1187
1188     if (debug)
1189     {
1190         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1191         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1192                 dd->rank, physicalnode_id_hash, gpu_id);
1193     }
1194     /* Split the PP communicator over the physical nodes */
1195     /* TODO: See if we should store this (before), as it's also used for
1196      * for the nodecomm summation.
1197      */
1198     // TODO PhysicalNodeCommunicator could be extended/used to handle
1199     // the need for per-node per-group communicators.
1200     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1201                    &mpi_comm_pp_physicalnode);
1202     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1203                    &dd->comm->mpi_comm_gpu_shared);
1204     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1205     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1206
1207     if (debug)
1208     {
1209         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1210     }
1211
1212     /* Note that some ranks could share a GPU, while others don't */
1213
1214     if (dd->comm->nrank_gpu_shared == 1)
1215     {
1216         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1217     }
1218 #else
1219     GMX_UNUSED_VALUE(cr);
1220     GMX_UNUSED_VALUE(gpu_id);
1221 #endif
1222 }
1223
1224 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1225 {
1226 #if GMX_MPI
1227     int  dim0, dim1, i, j;
1228     ivec loc;
1229
1230     if (debug)
1231     {
1232         fprintf(debug, "Making load communicators\n");
1233     }
1234
1235     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1236     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1237
1238     if (dd->ndim == 0)
1239     {
1240         return;
1241     }
1242
1243     clear_ivec(loc);
1244     make_load_communicator(dd, 0, loc);
1245     if (dd->ndim > 1)
1246     {
1247         dim0 = dd->dim[0];
1248         for (i = 0; i < dd->nc[dim0]; i++)
1249         {
1250             loc[dim0] = i;
1251             make_load_communicator(dd, 1, loc);
1252         }
1253     }
1254     if (dd->ndim > 2)
1255     {
1256         dim0 = dd->dim[0];
1257         for (i = 0; i < dd->nc[dim0]; i++)
1258         {
1259             loc[dim0] = i;
1260             dim1      = dd->dim[1];
1261             for (j = 0; j < dd->nc[dim1]; j++)
1262             {
1263                 loc[dim1] = j;
1264                 make_load_communicator(dd, 2, loc);
1265             }
1266         }
1267     }
1268
1269     if (debug)
1270     {
1271         fprintf(debug, "Finished making load communicators\n");
1272     }
1273 #endif
1274 }
1275
1276 /*! \brief Sets up the relation between neighboring domains and zones */
1277 static void setup_neighbor_relations(gmx_domdec_t *dd)
1278 {
1279     int                     d, dim, i, j, m;
1280     ivec                    tmp, s;
1281     gmx_domdec_zones_t     *zones;
1282     gmx_domdec_ns_ranges_t *izone;
1283     GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1284
1285     for (d = 0; d < dd->ndim; d++)
1286     {
1287         dim = dd->dim[d];
1288         copy_ivec(dd->ci, tmp);
1289         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1290         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1291         copy_ivec(dd->ci, tmp);
1292         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1293         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1294         if (debug)
1295         {
1296             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1297                     dd->rank, dim,
1298                     dd->neighbor[d][0],
1299                     dd->neighbor[d][1]);
1300         }
1301     }
1302
1303     int nzone  = (1 << dd->ndim);
1304     int nizone = (1 << std::max(dd->ndim - 1, 0));
1305     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1306
1307     zones = &dd->comm->zones;
1308
1309     for (i = 0; i < nzone; i++)
1310     {
1311         m = 0;
1312         clear_ivec(zones->shift[i]);
1313         for (d = 0; d < dd->ndim; d++)
1314         {
1315             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1316         }
1317     }
1318
1319     zones->n = nzone;
1320     for (i = 0; i < nzone; i++)
1321     {
1322         for (d = 0; d < DIM; d++)
1323         {
1324             s[d] = dd->ci[d] - zones->shift[i][d];
1325             if (s[d] < 0)
1326             {
1327                 s[d] += dd->nc[d];
1328             }
1329             else if (s[d] >= dd->nc[d])
1330             {
1331                 s[d] -= dd->nc[d];
1332             }
1333         }
1334     }
1335     zones->nizone = nizone;
1336     for (i = 0; i < zones->nizone; i++)
1337     {
1338         assert(ddNonbondedZonePairRanges[i][0] == i);
1339
1340         izone     = &zones->izone[i];
1341         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1342          * j-zones up to nzone.
1343          */
1344         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1345         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1346         for (dim = 0; dim < DIM; dim++)
1347         {
1348             if (dd->nc[dim] == 1)
1349             {
1350                 /* All shifts should be allowed */
1351                 izone->shift0[dim] = -1;
1352                 izone->shift1[dim] = 1;
1353             }
1354             else
1355             {
1356                 /* Determine the min/max j-zone shift wrt the i-zone */
1357                 izone->shift0[dim] = 1;
1358                 izone->shift1[dim] = -1;
1359                 for (j = izone->j0; j < izone->j1; j++)
1360                 {
1361                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1362                     if (shift_diff < izone->shift0[dim])
1363                     {
1364                         izone->shift0[dim] = shift_diff;
1365                     }
1366                     if (shift_diff > izone->shift1[dim])
1367                     {
1368                         izone->shift1[dim] = shift_diff;
1369                     }
1370                 }
1371             }
1372         }
1373     }
1374
1375     if (!isDlbDisabled(dd->comm))
1376     {
1377         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1378     }
1379
1380     if (dd->comm->ddSettings.recordLoad)
1381     {
1382         make_load_communicators(dd);
1383     }
1384 }
1385
1386 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1387                                  gmx_domdec_t         *dd,
1388                                  t_commrec gmx_unused *cr,
1389                                  bool gmx_unused       reorder)
1390 {
1391 #if GMX_MPI
1392     gmx_domdec_comm_t  *comm      = dd->comm;
1393     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1394
1395     if (cartSetup.bCartesianPP)
1396     {
1397         /* Set up cartesian communication for the particle-particle part */
1398         GMX_LOG(mdlog.info).appendTextFormatted(
1399                 "Will use a Cartesian communicator: %d x %d x %d",
1400                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1401
1402         ivec periods;
1403         for (int i = 0; i < DIM; i++)
1404         {
1405             periods[i] = TRUE;
1406         }
1407         MPI_Comm comm_cart;
1408         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1409                         &comm_cart);
1410         /* We overwrite the old communicator with the new cartesian one */
1411         cr->mpi_comm_mygroup = comm_cart;
1412     }
1413
1414     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1415     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1416
1417     if (cartSetup.bCartesianPP_PME)
1418     {
1419         /* Since we want to use the original cartesian setup for sim,
1420          * and not the one after split, we need to make an index.
1421          */
1422         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1423         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1424         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1425         /* Get the rank of the DD master,
1426          * above we made sure that the master node is a PP node.
1427          */
1428         int rank;
1429         if (MASTER(cr))
1430         {
1431             rank = dd->rank;
1432         }
1433         else
1434         {
1435             rank = 0;
1436         }
1437         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1438     }
1439     else if (cartSetup.bCartesianPP)
1440     {
1441         if (!comm->ddRankSetup.usePmeOnlyRanks)
1442         {
1443             /* The PP communicator is also
1444              * the communicator for this simulation
1445              */
1446             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1447         }
1448         cr->nodeid = dd->rank;
1449
1450         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1451
1452         /* We need to make an index to go from the coordinates
1453          * to the nodeid of this simulation.
1454          */
1455         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1456         std::vector<int> buf(dd->nnodes);
1457         if (thisRankHasDuty(cr, DUTY_PP))
1458         {
1459             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1460         }
1461         /* Communicate the ddindex to simulation nodeid index */
1462         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1463                       cr->mpi_comm_mysim);
1464
1465         /* Determine the master coordinates and rank.
1466          * The DD master should be the same node as the master of this sim.
1467          */
1468         for (int i = 0; i < dd->nnodes; i++)
1469         {
1470             if (cartSetup.ddindex2simnodeid[i] == 0)
1471             {
1472                 ddindex2xyz(dd->nc, i, dd->master_ci);
1473                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1474             }
1475         }
1476         if (debug)
1477         {
1478             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1479         }
1480     }
1481     else
1482     {
1483         /* No Cartesian communicators */
1484         /* We use the rank in dd->comm->all as DD index */
1485         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1486         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1487         dd->masterrank = 0;
1488         clear_ivec(dd->master_ci);
1489     }
1490 #endif
1491
1492     GMX_LOG(mdlog.info).appendTextFormatted(
1493             "Domain decomposition rank %d, coordinates %d %d %d\n",
1494             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1495     if (debug)
1496     {
1497         fprintf(debug,
1498                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1499                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1500     }
1501 }
1502
1503 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1504                                       t_commrec            *cr)
1505 {
1506 #if GMX_MPI
1507     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1508
1509     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1510     {
1511         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1512         std::vector<int> buf(dd->nnodes);
1513         if (thisRankHasDuty(cr, DUTY_PP))
1514         {
1515             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1516         }
1517         /* Communicate the ddindex to simulation nodeid index */
1518         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1519                       cr->mpi_comm_mysim);
1520     }
1521 #else
1522     GMX_UNUSED_VALUE(dd);
1523     GMX_UNUSED_VALUE(cr);
1524 #endif
1525 }
1526
1527 static CartesianRankSetup
1528 split_communicator(const gmx::MDLogger    &mdlog,
1529                    t_commrec              *cr,
1530                    const DdRankOrder       ddRankOrder,
1531                    bool gmx_unused         reorder,
1532                    const DDRankSetup      &ddRankSetup,
1533                    ivec                    ddCellIndex,
1534                    std::vector<int>       *pmeRanks)
1535 {
1536     CartesianRankSetup cartSetup;
1537
1538     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1539     cartSetup.bCartesianPP_PME = false;
1540
1541     const ivec &numDDCells     = ddRankSetup.numPPCells;
1542     /* Initially we set ntot to the number of PP cells */
1543     copy_ivec(numDDCells, cartSetup.ntot);
1544
1545     if (cartSetup.bCartesianPP)
1546     {
1547         const int numDDCellsTot = ddRankSetup.numPPRanks;
1548         bool      bDiv[DIM];
1549         for (int i = 1; i < DIM; i++)
1550         {
1551             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1552         }
1553         if (bDiv[YY] || bDiv[ZZ])
1554         {
1555             cartSetup.bCartesianPP_PME = TRUE;
1556             /* If we have 2D PME decomposition, which is always in x+y,
1557              * we stack the PME only nodes in z.
1558              * Otherwise we choose the direction that provides the thinnest slab
1559              * of PME only nodes as this will have the least effect
1560              * on the PP communication.
1561              * But for the PME communication the opposite might be better.
1562              */
1563             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1564                              !bDiv[YY] ||
1565                              numDDCells[YY] > numDDCells[ZZ]))
1566             {
1567                 cartSetup.cartpmedim = ZZ;
1568             }
1569             else
1570             {
1571                 cartSetup.cartpmedim = YY;
1572             }
1573             cartSetup.ntot[cartSetup.cartpmedim]
1574                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1575         }
1576         else
1577         {
1578             GMX_LOG(mdlog.info).appendTextFormatted(
1579                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1580                     ddRankSetup.numRanksDoingPme,
1581                     numDDCells[XX], numDDCells[YY],
1582                     numDDCells[XX], numDDCells[ZZ]);
1583             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1584         }
1585     }
1586
1587     if (cartSetup.bCartesianPP_PME)
1588     {
1589 #if GMX_MPI
1590         int  rank;
1591         ivec periods;
1592
1593         GMX_LOG(mdlog.info).appendTextFormatted(
1594                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1595                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1596
1597         for (int i = 0; i < DIM; i++)
1598         {
1599             periods[i] = TRUE;
1600         }
1601         MPI_Comm comm_cart;
1602         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1603                         &comm_cart);
1604         MPI_Comm_rank(comm_cart, &rank);
1605         if (MASTER(cr) && rank != 0)
1606         {
1607             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1608         }
1609
1610         /* With this assigment we loose the link to the original communicator
1611          * which will usually be MPI_COMM_WORLD, unless have multisim.
1612          */
1613         cr->mpi_comm_mysim = comm_cart;
1614         cr->sim_nodeid     = rank;
1615
1616         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1617
1618         GMX_LOG(mdlog.info).appendTextFormatted(
1619                 "Cartesian rank %d, coordinates %d %d %d\n",
1620                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1621
1622         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1623         {
1624             cr->duty = DUTY_PP;
1625         }
1626         if (!ddRankSetup.usePmeOnlyRanks ||
1627             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1628         {
1629             cr->duty = DUTY_PME;
1630         }
1631
1632         /* Split the sim communicator into PP and PME only nodes */
1633         MPI_Comm_split(cr->mpi_comm_mysim,
1634                        getThisRankDuties(cr),
1635                        dd_index(cartSetup.ntot, ddCellIndex),
1636                        &cr->mpi_comm_mygroup);
1637 #else
1638         GMX_UNUSED_VALUE(ddCellIndex);
1639 #endif
1640     }
1641     else
1642     {
1643         switch (ddRankOrder)
1644         {
1645             case DdRankOrder::pp_pme:
1646                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1647                 break;
1648             case DdRankOrder::interleave:
1649                 /* Interleave the PP-only and PME-only ranks */
1650                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1651                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1652                 break;
1653             case DdRankOrder::cartesian:
1654                 break;
1655             default:
1656                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1657         }
1658
1659         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1660         {
1661             cr->duty = DUTY_PME;
1662         }
1663         else
1664         {
1665             cr->duty = DUTY_PP;
1666         }
1667 #if GMX_MPI
1668         /* Split the sim communicator into PP and PME only nodes */
1669         MPI_Comm_split(cr->mpi_comm_mysim,
1670                        getThisRankDuties(cr),
1671                        cr->nodeid,
1672                        &cr->mpi_comm_mygroup);
1673         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1674 #endif
1675     }
1676
1677     GMX_LOG(mdlog.info).appendTextFormatted(
1678             "This rank does only %s work.\n",
1679             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1680
1681     return cartSetup;
1682 }
1683
1684 /*! \brief Makes the PP communicator and the PME communicator, when needed
1685  *
1686  * Returns the Cartesian rank setup.
1687  * Sets \p cr->mpi_comm_mygroup
1688  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1689  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1690  */
1691 static CartesianRankSetup
1692 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1693                        const DDSettings    &ddSettings,
1694                        const DdRankOrder    ddRankOrder,
1695                        const DDRankSetup   &ddRankSetup,
1696                        t_commrec           *cr,
1697                        ivec                 ddCellIndex,
1698                        std::vector<int>    *pmeRanks)
1699 {
1700     CartesianRankSetup cartSetup;
1701
1702     if (ddRankSetup.usePmeOnlyRanks)
1703     {
1704         /* Split the communicator into a PP and PME part */
1705         cartSetup =
1706             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1707                                ddRankSetup, ddCellIndex, pmeRanks);
1708     }
1709     else
1710     {
1711         /* All nodes do PP and PME */
1712         /* We do not require separate communicators */
1713         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1714
1715         cartSetup.bCartesianPP     = false;
1716         cartSetup.bCartesianPP_PME = false;
1717     }
1718
1719     return cartSetup;
1720 }
1721
1722 /*! \brief For PP ranks, sets or makes the communicator
1723  *
1724  * For PME ranks get the rank id.
1725  * For PP only ranks, sets the PME-only rank.
1726  */
1727 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1728                                     const DDSettings         &ddSettings,
1729                                     gmx::ArrayRef<const int>  pmeRanks,
1730                                     t_commrec                *cr,
1731                                     gmx_domdec_t             *dd)
1732 {
1733     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1734     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1735
1736     if (thisRankHasDuty(cr, DUTY_PP))
1737     {
1738         /* Copy or make a new PP communicator */
1739
1740         /* We (possibly) reordered the nodes in split_communicator,
1741          * so it is no longer required in make_pp_communicator.
1742          */
1743         const bool useCartesianReorder =
1744             (ddSettings.useCartesianReorder &&
1745              !cartSetup.bCartesianPP_PME);
1746
1747         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1748     }
1749     else
1750     {
1751         receive_ddindex2simnodeid(dd, cr);
1752     }
1753
1754     if (!thisRankHasDuty(cr, DUTY_PME))
1755     {
1756         /* Set up the commnuication to our PME node */
1757         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1758         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1759         if (debug)
1760         {
1761             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1762                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1763         }
1764     }
1765     else
1766     {
1767         dd->pme_nodeid = -1;
1768     }
1769
1770     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1771     if (MASTER(cr))
1772     {
1773         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1774                                                     dd->comm->cgs_gl.nr,
1775                                                     dd->comm->cgs_gl.index[dd->comm->cgs_gl.nr]);
1776     }
1777 }
1778
1779 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1780                           const char *dir, int nc, const char *size_string)
1781 {
1782     real  *slb_frac, tot;
1783     int    i, n;
1784     double dbl;
1785
1786     slb_frac = nullptr;
1787     if (nc > 1 && size_string != nullptr)
1788     {
1789         GMX_LOG(mdlog.info).appendTextFormatted(
1790                 "Using static load balancing for the %s direction", dir);
1791         snew(slb_frac, nc);
1792         tot = 0;
1793         for (i = 0; i < nc; i++)
1794         {
1795             dbl = 0;
1796             sscanf(size_string, "%20lf%n", &dbl, &n);
1797             if (dbl == 0)
1798             {
1799                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1800             }
1801             slb_frac[i]  = dbl;
1802             size_string += n;
1803             tot         += slb_frac[i];
1804         }
1805         /* Normalize */
1806         std::string relativeCellSizes = "Relative cell sizes:";
1807         for (i = 0; i < nc; i++)
1808         {
1809             slb_frac[i]       /= tot;
1810             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1811         }
1812         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1813     }
1814
1815     return slb_frac;
1816 }
1817
1818 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1819 {
1820     int                  n     = 0;
1821     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1822     int                  nmol;
1823     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1824     {
1825         for (auto &ilist : extractILists(*ilists, IF_BOND))
1826         {
1827             if (NRAL(ilist.functionType) >  2)
1828             {
1829                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1830             }
1831         }
1832     }
1833
1834     return n;
1835 }
1836
1837 static int dd_getenv(const gmx::MDLogger &mdlog,
1838                      const char *env_var, int def)
1839 {
1840     char *val;
1841     int   nst;
1842
1843     nst = def;
1844     val = getenv(env_var);
1845     if (val)
1846     {
1847         if (sscanf(val, "%20d", &nst) <= 0)
1848         {
1849             nst = 1;
1850         }
1851         GMX_LOG(mdlog.info).appendTextFormatted(
1852                 "Found env.var. %s = %s, using value %d",
1853                 env_var, val, nst);
1854     }
1855
1856     return nst;
1857 }
1858
1859 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1860                                   const t_inputrec    *ir,
1861                                   const gmx::MDLogger &mdlog)
1862 {
1863     if (ir->ePBC == epbcSCREW &&
1864         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1865     {
1866         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1867     }
1868
1869     if (ir->nstlist == 0)
1870     {
1871         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1872     }
1873
1874     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1875     {
1876         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1877     }
1878 }
1879
1880 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1881                                  const ivec         numDomains)
1882 {
1883     real r = ddbox.box_size[XX];
1884     for (int d = 0; d < DIM; d++)
1885     {
1886         if (numDomains[d] > 1)
1887         {
1888             /* Check using the initial average cell size */
1889             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1890         }
1891     }
1892
1893     return r;
1894 }
1895
1896 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1897  */
1898 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1899                                   const std::string   &reasonStr,
1900                                   const gmx::MDLogger &mdlog)
1901 {
1902     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1903     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1904
1905     if (cmdlineDlbState == DlbState::onUser)
1906     {
1907         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1908     }
1909     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1910     {
1911         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1912     }
1913     return DlbState::offForever;
1914 }
1915
1916 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1917  *
1918  * This function parses the parameters of "-dlb" command line option setting
1919  * corresponding state values. Then it checks the consistency of the determined
1920  * state with other run parameters and settings. As a result, the initial state
1921  * may be altered or an error may be thrown if incompatibility of options is detected.
1922  *
1923  * \param [in] mdlog       Logger.
1924  * \param [in] dlbOption   Enum value for the DLB option.
1925  * \param [in] bRecordLoad True if the load balancer is recording load information.
1926  * \param [in] mdrunOptions  Options for mdrun.
1927  * \param [in] ir          Pointer mdrun to input parameters.
1928  * \returns                DLB initial/startup state.
1929  */
1930 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1931                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1932                                          const gmx::MdrunOptions &mdrunOptions,
1933                                          const t_inputrec *ir)
1934 {
1935     DlbState dlbState = DlbState::offCanTurnOn;
1936
1937     switch (dlbOption)
1938     {
1939         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1940         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1941         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1942         default: gmx_incons("Invalid dlbOption enum value");
1943     }
1944
1945     /* Reruns don't support DLB: bail or override auto mode */
1946     if (mdrunOptions.rerun)
1947     {
1948         std::string reasonStr = "it is not supported in reruns.";
1949         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1950     }
1951
1952     /* Unsupported integrators */
1953     if (!EI_DYNAMICS(ir->eI))
1954     {
1955         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1956         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1957     }
1958
1959     /* Without cycle counters we can't time work to balance on */
1960     if (!bRecordLoad)
1961     {
1962         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1963         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1964     }
1965
1966     if (mdrunOptions.reproducible)
1967     {
1968         std::string reasonStr = "you started a reproducible run.";
1969         switch (dlbState)
1970         {
1971             case DlbState::offUser:
1972                 break;
1973             case DlbState::offForever:
1974                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1975                 break;
1976             case DlbState::offCanTurnOn:
1977                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1978             case DlbState::onCanTurnOff:
1979                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1980                 break;
1981             case DlbState::onUser:
1982                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1983             default:
1984                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1985         }
1986     }
1987
1988     return dlbState;
1989 }
1990
1991 /* Sets the order of the DD dimensions, returns the number of DD dimensions */
1992 static int set_dd_dim(const ivec        numDDCells,
1993                       const DDSettings &ddSettings,
1994                       ivec              dims)
1995 {
1996     int ndim = 0;
1997     if (ddSettings.useDDOrderZYX)
1998     {
1999         /* Decomposition order z,y,x */
2000         for (int dim = DIM - 1; dim >= 0; dim--)
2001         {
2002             if (numDDCells[dim] > 1)
2003             {
2004                 dims[ndim++] = dim;
2005             }
2006         }
2007     }
2008     else
2009     {
2010         /* Decomposition order x,y,z */
2011         for (int dim = 0; dim < DIM; dim++)
2012         {
2013             if (numDDCells[dim] > 1)
2014             {
2015                 dims[ndim++] = dim;
2016             }
2017         }
2018     }
2019
2020     if (ndim == 0)
2021     {
2022         /* Set dim[0] to avoid extra checks on ndim in several places */
2023         dims[0] = XX;
2024     }
2025
2026     return ndim;
2027 }
2028
2029 static gmx_domdec_comm_t *init_dd_comm()
2030 {
2031     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2032
2033     comm->n_load_have      = 0;
2034     comm->n_load_collect   = 0;
2035
2036     comm->haveTurnedOffDlb = false;
2037
2038     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2039     {
2040         comm->sum_nat[i] = 0;
2041     }
2042     comm->ndecomp   = 0;
2043     comm->nload     = 0;
2044     comm->load_step = 0;
2045     comm->load_sum  = 0;
2046     comm->load_max  = 0;
2047     clear_ivec(comm->load_lim);
2048     comm->load_mdf  = 0;
2049     comm->load_pme  = 0;
2050
2051     /* This should be replaced by a unique pointer */
2052     comm->balanceRegion = ddBalanceRegionAllocate();
2053
2054     return comm;
2055 }
2056
2057 /* Returns whether mtop contains constraints and/or vsites */
2058 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2059 {
2060     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2061     int  nmol;
2062     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2063     {
2064         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2065         {
2066             return true;
2067         }
2068     }
2069
2070     return false;
2071 }
2072
2073 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2074                               const gmx_mtop_t    &mtop,
2075                               const t_inputrec    &inputrec,
2076                               const real           cutoffMargin,
2077                               DDSystemInfo        *systemInfo)
2078 {
2079     /* When we have constraints and/or vsites, it is beneficial to use
2080      * update groups (when possible) to allow independent update of groups.
2081      */
2082     if (!systemHasConstraintsOrVsites(mtop))
2083     {
2084         /* No constraints or vsites, atoms can be updated independently */
2085         return;
2086     }
2087
2088     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2089     systemInfo->useUpdateGroups               =
2090         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2091          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2092
2093     if (systemInfo->useUpdateGroups)
2094     {
2095         int numUpdateGroups = 0;
2096         for (const auto &molblock : mtop.molblock)
2097         {
2098             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2099         }
2100
2101         systemInfo->maxUpdateGroupRadius =
2102             computeMaxUpdateGroupRadius(mtop,
2103                                         systemInfo->updateGroupingPerMoleculetype,
2104                                         maxReferenceTemperature(inputrec));
2105
2106         /* To use update groups, the large domain-to-domain cutoff distance
2107          * should be compatible with the box size.
2108          */
2109         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2110
2111         if (systemInfo->useUpdateGroups)
2112         {
2113             GMX_LOG(mdlog.info).appendTextFormatted(
2114                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2115                     numUpdateGroups,
2116                     mtop.natoms/static_cast<double>(numUpdateGroups),
2117                     systemInfo->maxUpdateGroupRadius);
2118         }
2119         else
2120         {
2121             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2122             systemInfo->updateGroupingPerMoleculetype.clear();
2123         }
2124     }
2125 }
2126
2127 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2128     npbcdim(ePBC2npbcdim(ir.ePBC)),
2129     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2130     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2131     haveScrewPBC(ir.ePBC == epbcSCREW)
2132 {
2133 }
2134
2135 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2136 static bool
2137 moleculesAreAlwaysWhole(const gmx_mtop_t                            &mtop,
2138                         const bool                                   useUpdateGroups,
2139                         gmx::ArrayRef<const gmx::RangePartitioning>  updateGroupingPerMoleculetype)
2140 {
2141     if (useUpdateGroups)
2142     {
2143         GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2144                            "Need one grouping per moltype");
2145         for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2146         {
2147             if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2148             {
2149                 return false;
2150             }
2151         }
2152     }
2153     else
2154     {
2155         for (const auto &moltype : mtop.moltype)
2156         {
2157             if (moltype.atoms.nr > 1)
2158             {
2159                 return false;
2160             }
2161         }
2162     }
2163
2164     return true;
2165 }
2166
2167 /*! \brief Generate the simulation system information */
2168 static DDSystemInfo
2169 getSystemInfo(const gmx::MDLogger           &mdlog,
2170               t_commrec                     *cr,
2171               const DomdecOptions           &options,
2172               const gmx_mtop_t              *mtop,
2173               const t_inputrec              *ir,
2174               const matrix                   box,
2175               gmx::ArrayRef<const gmx::RVec> xGlobal)
2176 {
2177     const real         tenPercentMargin = 1.1;
2178
2179     DDSystemInfo       systemInfo;
2180
2181     /* We need to decide on update groups early, as this affects communication distances */
2182     systemInfo.useUpdateGroups = false;
2183     if (ir->cutoff_scheme == ecutsVERLET)
2184     {
2185         real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2186         setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2187     }
2188
2189     systemInfo.moleculesAreAlwaysWhole         =
2190         moleculesAreAlwaysWhole(*mtop,
2191                                 systemInfo.useUpdateGroups,
2192                                 systemInfo.updateGroupingPerMoleculetype);
2193     systemInfo.haveInterDomainBondeds          = (!systemInfo.moleculesAreAlwaysWhole ||
2194                                                   mtop->bIntermolecularInteractions);
2195     systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2196                                                   multi_body_bondeds_count(mtop) > 0);
2197
2198     if (systemInfo.useUpdateGroups)
2199     {
2200         systemInfo.haveSplitConstraints = false;
2201         systemInfo.haveSplitSettles     = false;
2202     }
2203     else
2204     {
2205         systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2206         systemInfo.haveSplitSettles     = gmx::inter_charge_group_settles(*mtop);
2207     }
2208
2209     if (ir->rlist == 0)
2210     {
2211         /* Set the cut-off to some very large value,
2212          * so we don't need if statements everywhere in the code.
2213          * We use sqrt, since the cut-off is squared in some places.
2214          */
2215         systemInfo.cutoff = GMX_CUTOFF_INF;
2216     }
2217     else
2218     {
2219         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2220     }
2221     systemInfo.minCutoffForMultiBody = 0;
2222
2223     /* Determine the minimum cell size limit, affected by many factors */
2224     systemInfo.cellsizeLimit             = 0;
2225     systemInfo.filterBondedCommunication = false;
2226
2227     /* We do not allow home atoms to move beyond the neighboring domain
2228      * between domain decomposition steps, which limits the cell size.
2229      * Get an estimate of cell size limit due to atom displacement.
2230      * In most cases this is a large overestimate, because it assumes
2231      * non-interaction atoms.
2232      * We set the chance to 1 in a trillion steps.
2233      */
2234     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2235     const real     limitForAtomDisplacement          =
2236         minCellSizeForAtomDisplacement(*mtop, *ir,
2237                                        systemInfo.updateGroupingPerMoleculetype,
2238                                        c_chanceThatAtomMovesBeyondDomain);
2239     GMX_LOG(mdlog.info).appendTextFormatted(
2240             "Minimum cell size due to atom displacement: %.3f nm",
2241             limitForAtomDisplacement);
2242
2243     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2244                                         limitForAtomDisplacement);
2245
2246     /* TODO: PME decomposition currently requires atoms not to be more than
2247      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2248      *       In nearly all cases, limitForAtomDisplacement will be smaller
2249      *       than 2/3*rlist, so the PME requirement is satisfied.
2250      *       But it would be better for both correctness and performance
2251      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2252      *       Note that we would need to improve the pairlist buffer case.
2253      */
2254
2255     if (systemInfo.haveInterDomainBondeds)
2256     {
2257         if (options.minimumCommunicationRange > 0)
2258         {
2259             systemInfo.minCutoffForMultiBody =
2260                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2261             if (options.useBondedCommunication)
2262             {
2263                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2264             }
2265             else
2266             {
2267                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2268                                              systemInfo.minCutoffForMultiBody);
2269             }
2270         }
2271         else if (ir->bPeriodicMols)
2272         {
2273             /* Can not easily determine the required cut-off */
2274             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2275             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2276         }
2277         else
2278         {
2279             real r_2b, r_mb;
2280
2281             if (MASTER(cr))
2282             {
2283                 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2284                                       options.checkBondedInteractions,
2285                                       &r_2b, &r_mb);
2286             }
2287             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2288             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2289
2290             /* We use an initial margin of 10% for the minimum cell size,
2291              * except when we are just below the non-bonded cut-off.
2292              */
2293             if (options.useBondedCommunication)
2294             {
2295                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2296                 {
2297                     const real r_bonded              = std::max(r_2b, r_mb);
2298                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2299                     /* This is the (only) place where we turn on the filtering */
2300                     systemInfo.filterBondedCommunication = true;
2301                 }
2302                 else
2303                 {
2304                     const real r_bonded              = r_mb;
2305                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2306                                                                 systemInfo.cutoff);
2307                 }
2308                 /* We determine cutoff_mbody later */
2309                 systemInfo.increaseMultiBodyCutoff = true;
2310             }
2311             else
2312             {
2313                 /* No special bonded communication,
2314                  * simply increase the DD cut-off.
2315                  */
2316                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2317                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2318                                                             systemInfo.minCutoffForMultiBody);
2319             }
2320         }
2321         GMX_LOG(mdlog.info).appendTextFormatted(
2322                 "Minimum cell size due to bonded interactions: %.3f nm",
2323                 systemInfo.minCutoffForMultiBody);
2324
2325         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2326                                             systemInfo.minCutoffForMultiBody);
2327     }
2328
2329     systemInfo.constraintCommunicationRange = 0;
2330     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2331     {
2332         /* There is a cell size limit due to the constraints (P-LINCS) */
2333         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2334         GMX_LOG(mdlog.info).appendTextFormatted(
2335                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2336                 systemInfo.constraintCommunicationRange);
2337         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2338         {
2339             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2340         }
2341     }
2342     else if (options.constraintCommunicationRange > 0)
2343     {
2344         /* Here we do not check for dd->splitConstraints.
2345          * because one can also set a cell size limit for virtual sites only
2346          * and at this point we don't know yet if there are intercg v-sites.
2347          */
2348         GMX_LOG(mdlog.info).appendTextFormatted(
2349                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2350                 options.constraintCommunicationRange);
2351         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2352     }
2353     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2354                                         systemInfo.constraintCommunicationRange);
2355
2356     return systemInfo;
2357 }
2358
2359 /*! \brief Set the cell size and interaction limits, as well as the DD grid
2360  *
2361  * Also computes the initial ddbox.
2362  */
2363 static DDGridSetup
2364 getDDGridSetup(const gmx::MDLogger           &mdlog,
2365                t_commrec                     *cr,
2366                const DomdecOptions           &options,
2367                const DDSettings              &ddSettings,
2368                const DDSystemInfo            &systemInfo,
2369                const gmx_mtop_t              *mtop,
2370                const t_inputrec              *ir,
2371                const matrix                   box,
2372                gmx::ArrayRef<const gmx::RVec> xGlobal,
2373                gmx_ddbox_t                   *ddbox)
2374 {
2375     DDGridSetup ddGridSetup;
2376
2377     if (options.numCells[XX] > 0)
2378     {
2379         copy_ivec(options.numCells, ddGridSetup.numDomains);
2380         set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
2381
2382         if (options.numPmeRanks >= 0)
2383         {
2384             ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
2385         }
2386         else
2387         {
2388             /* When the DD grid is set explicitly and -npme is set to auto,
2389              * don't use PME ranks. We check later if the DD grid is
2390              * compatible with the total number of ranks.
2391              */
2392             ddGridSetup.numPmeOnlyRanks = 0;
2393         }
2394     }
2395     else
2396     {
2397         set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2398
2399         /* We need to choose the optimal DD grid and possibly PME nodes */
2400         ddGridSetup =
2401             dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
2402                            options.numPmeRanks,
2403                            ddSettings.request1DAnd1Pulse,
2404                            !isDlbDisabled(ddSettings.initialDlbState),
2405                            options.dlbScaling,
2406                            systemInfo);
2407
2408         if (ddGridSetup.numDomains[XX] == 0)
2409         {
2410             char     buf[STRLEN];
2411             gmx_bool bC = (systemInfo.haveSplitConstraints &&
2412                            systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2413             sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2414                     !bC ? "-rdd" : "-rcon",
2415                     ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2416                     bC ? " or your LINCS settings" : "");
2417
2418             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2419                                  "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2420                                  "%s\n"
2421                                  "Look in the log file for details on the domain decomposition",
2422                                  cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2423                                  ddGridSetup.cellsizeLimit,
2424                                  buf);
2425         }
2426     }
2427
2428     const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
2429     if (acs < systemInfo.cellsizeLimit)
2430     {
2431         if (options.numCells[XX] <= 0)
2432         {
2433             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2434         }
2435         else
2436         {
2437             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2438                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2439                                  acs, systemInfo.cellsizeLimit);
2440         }
2441     }
2442
2443     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2444     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2445     {
2446         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2447                              "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2448                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2449     }
2450     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2451     {
2452         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2453                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2454     }
2455
2456     ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
2457                                              ddGridSetup.ddDimensions);
2458
2459     return ddGridSetup;
2460 }
2461
2462 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2463 static DDRankSetup
2464 getDDRankSetup(const gmx::MDLogger &mdlog,
2465                t_commrec           *cr,
2466                const DDGridSetup   &ddGridSetup,
2467                const t_inputrec    &ir)
2468 {
2469     GMX_LOG(mdlog.info).appendTextFormatted(
2470             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2471             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2472             ddGridSetup.numPmeOnlyRanks);
2473
2474     DDRankSetup ddRankSetup;
2475
2476     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2477     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2478
2479     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2480     if (ddRankSetup.usePmeOnlyRanks)
2481     {
2482         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2483     }
2484     else
2485     {
2486         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2487     }
2488
2489     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2490     {
2491         /* The following choices should match those
2492          * in comm_cost_est in domdec_setup.c.
2493          * Note that here the checks have to take into account
2494          * that the decomposition might occur in a different order than xyz
2495          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2496          * in which case they will not match those in comm_cost_est,
2497          * but since that is mainly for testing purposes that's fine.
2498          */
2499         if (ddGridSetup.numDDDimensions >= 2 &&
2500             ddGridSetup.ddDimensions[0] == XX &&
2501             ddGridSetup.ddDimensions[1] == YY &&
2502             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2503             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2504             getenv("GMX_PMEONEDD") == nullptr)
2505         {
2506             ddRankSetup.npmedecompdim = 2;
2507             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2508             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2509         }
2510         else
2511         {
2512             /* In case nc is 1 in both x and y we could still choose to
2513              * decompose pme in y instead of x, but we use x for simplicity.
2514              */
2515             ddRankSetup.npmedecompdim = 1;
2516             if (ddGridSetup.ddDimensions[0] == YY)
2517             {
2518                 ddRankSetup.npmenodes_x = 1;
2519                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2520             }
2521             else
2522             {
2523                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2524                 ddRankSetup.npmenodes_y = 1;
2525             }
2526         }
2527         GMX_LOG(mdlog.info).appendTextFormatted(
2528                 "PME domain decomposition: %d x %d x %d",
2529                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2530     }
2531     else
2532     {
2533         ddRankSetup.npmedecompdim = 0;
2534         ddRankSetup.npmenodes_x   = 0;
2535         ddRankSetup.npmenodes_y   = 0;
2536     }
2537
2538     return ddRankSetup;
2539 }
2540
2541 /*! \brief Set the cell size and interaction limits */
2542 static void set_dd_limits(const gmx::MDLogger &mdlog,
2543                           t_commrec *cr, gmx_domdec_t *dd,
2544                           const DomdecOptions &options,
2545                           const DDSettings &ddSettings,
2546                           const DDSystemInfo &systemInfo,
2547                           const DDGridSetup &ddGridSetup,
2548                           const gmx_mtop_t *mtop,
2549                           const t_inputrec *ir,
2550                           const gmx_ddbox_t &ddbox)
2551 {
2552     gmx_domdec_comm_t *comm = dd->comm;
2553     comm->ddSettings        = ddSettings;
2554
2555     /* Initialize to GPU share count to 0, might change later */
2556     comm->nrank_gpu_shared = 0;
2557
2558     comm->dlbState         = comm->ddSettings.initialDlbState;
2559     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2560     /* To consider turning DLB on after 2*nstlist steps we need to check
2561      * at partitioning count 3. Thus we need to increase the first count by 2.
2562      */
2563     comm->ddPartioningCountFirstDlbOff += 2;
2564
2565     comm->bPMELoadBalDLBLimits          = FALSE;
2566
2567     /* Allocate the charge group/atom sorting struct */
2568     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2569
2570     comm->systemInfo = systemInfo;
2571
2572     if (systemInfo.useUpdateGroups)
2573     {
2574         /* Note: We would like to use dd->nnodes for the atom count estimate,
2575          *       but that is not yet available here. But this anyhow only
2576          *       affect performance up to the second dd_partition_system call.
2577          */
2578         const int homeAtomCountEstimate =  mtop->natoms/cr->nnodes;
2579         comm->updateGroupsCog =
2580             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2581                                                    systemInfo.updateGroupingPerMoleculetype,
2582                                                    maxReferenceTemperature(*ir),
2583                                                    homeAtomCountEstimate);
2584     }
2585
2586     comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2587
2588     /* Set the DD setup given by ddGridSetup */
2589     copy_ivec(ddGridSetup.numDomains, dd->nc);
2590     dd->ndim = ddGridSetup.numDDDimensions;
2591     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2592
2593     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2594
2595     snew(comm->slb_frac, DIM);
2596     if (isDlbDisabled(comm))
2597     {
2598         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2599         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2600         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2601     }
2602
2603     /* Set the multi-body cut-off and cellsize limit for DLB */
2604     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2605     comm->cellsize_limit = systemInfo.cellsizeLimit;
2606     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2607     {
2608         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2609         {
2610             /* Set the bonded communication distance to halfway
2611              * the minimum and the maximum,
2612              * since the extra communication cost is nearly zero.
2613              */
2614             real acs           = average_cellsize_min(ddbox, dd->nc);
2615             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2616             if (!isDlbDisabled(comm))
2617             {
2618                 /* Check if this does not limit the scaling */
2619                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2620                                               options.dlbScaling*acs);
2621             }
2622             if (!systemInfo.filterBondedCommunication)
2623             {
2624                 /* Without bBondComm do not go beyond the n.b. cut-off */
2625                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2626                 if (comm->cellsize_limit >= systemInfo.cutoff)
2627                 {
2628                     /* We don't loose a lot of efficieny
2629                      * when increasing it to the n.b. cut-off.
2630                      * It can even be slightly faster, because we need
2631                      * less checks for the communication setup.
2632                      */
2633                     comm->cutoff_mbody = systemInfo.cutoff;
2634                 }
2635             }
2636             /* Check if we did not end up below our original limit */
2637             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2638                                           systemInfo.minCutoffForMultiBody);
2639
2640             if (comm->cutoff_mbody > comm->cellsize_limit)
2641             {
2642                 comm->cellsize_limit = comm->cutoff_mbody;
2643             }
2644         }
2645         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2646     }
2647
2648     if (debug)
2649     {
2650         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2651                 "cellsize limit %f\n",
2652                 gmx::boolToString(systemInfo.filterBondedCommunication),
2653                 comm->cellsize_limit);
2654     }
2655
2656     if (MASTER(cr))
2657     {
2658         check_dd_restrictions(dd, ir, mdlog);
2659     }
2660 }
2661
2662 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2663 {
2664     int   ncg, cg;
2665     char *bLocalCG;
2666
2667     ncg = ncg_mtop(mtop);
2668     snew(bLocalCG, ncg);
2669     for (cg = 0; cg < ncg; cg++)
2670     {
2671         bLocalCG[cg] = FALSE;
2672     }
2673
2674     return bLocalCG;
2675 }
2676
2677 void dd_init_bondeds(FILE *fplog,
2678                      gmx_domdec_t *dd,
2679                      const gmx_mtop_t *mtop,
2680                      const gmx_vsite_t *vsite,
2681                      const t_inputrec *ir,
2682                      gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2683 {
2684     gmx_domdec_comm_t *comm;
2685
2686     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2687
2688     comm = dd->comm;
2689
2690     if (comm->systemInfo.filterBondedCommunication)
2691     {
2692         /* Communicate atoms beyond the cut-off for bonded interactions */
2693         comm = dd->comm;
2694
2695         comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2696
2697         comm->bLocalCG = init_bLocalCG(mtop);
2698     }
2699     else
2700     {
2701         /* Only communicate atoms based on cut-off */
2702         comm->cglink   = nullptr;
2703         comm->bLocalCG = nullptr;
2704     }
2705 }
2706
2707 static void writeSettings(gmx::TextWriter       *log,
2708                           gmx_domdec_t          *dd,
2709                           const gmx_mtop_t      *mtop,
2710                           const t_inputrec      *ir,
2711                           gmx_bool               bDynLoadBal,
2712                           real                   dlb_scale,
2713                           const gmx_ddbox_t     *ddbox)
2714 {
2715     gmx_domdec_comm_t *comm;
2716     int                d;
2717     ivec               np;
2718     real               limit, shrink;
2719
2720     comm = dd->comm;
2721
2722     if (bDynLoadBal)
2723     {
2724         log->writeString("The maximum number of communication pulses is:");
2725         for (d = 0; d < dd->ndim; d++)
2726         {
2727             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2728         }
2729         log->ensureLineBreak();
2730         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2731         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2732         log->writeString("The allowed shrink of domain decomposition cells is:");
2733         for (d = 0; d < DIM; d++)
2734         {
2735             if (dd->nc[d] > 1)
2736             {
2737                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2738                 {
2739                     shrink = 0;
2740                 }
2741                 else
2742                 {
2743                     shrink =
2744                         comm->cellsize_min_dlb[d]/
2745                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2746                 }
2747                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2748             }
2749         }
2750         log->ensureLineBreak();
2751     }
2752     else
2753     {
2754         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2755         log->writeString("The initial number of communication pulses is:");
2756         for (d = 0; d < dd->ndim; d++)
2757         {
2758             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2759         }
2760         log->ensureLineBreak();
2761         log->writeString("The initial domain decomposition cell size is:");
2762         for (d = 0; d < DIM; d++)
2763         {
2764             if (dd->nc[d] > 1)
2765             {
2766                 log->writeStringFormatted(" %c %.2f nm",
2767                                           dim2char(d), dd->comm->cellsize_min[d]);
2768             }
2769         }
2770         log->ensureLineBreak();
2771         log->writeLine();
2772     }
2773
2774     const bool haveInterDomainVsites =
2775         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2776
2777     if (comm->systemInfo.haveInterDomainBondeds ||
2778         haveInterDomainVsites ||
2779         comm->systemInfo.haveSplitConstraints ||
2780         comm->systemInfo.haveSplitSettles)
2781     {
2782         std::string decompUnits;
2783         if (comm->systemInfo.useUpdateGroups)
2784         {
2785             decompUnits = "atom groups";
2786         }
2787         else
2788         {
2789             decompUnits = "atoms";
2790         }
2791
2792         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2793         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2794
2795         if (bDynLoadBal)
2796         {
2797             limit = dd->comm->cellsize_limit;
2798         }
2799         else
2800         {
2801             if (dd->unitCellInfo.ddBoxIsDynamic)
2802             {
2803                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2804             }
2805             limit = dd->comm->cellsize_min[XX];
2806             for (d = 1; d < DIM; d++)
2807             {
2808                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2809             }
2810         }
2811
2812         if (comm->systemInfo.haveInterDomainBondeds)
2813         {
2814             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2815                                     "two-body bonded interactions", "(-rdd)",
2816                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2817             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2818                                     "multi-body bonded interactions", "(-rdd)",
2819                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2820         }
2821         if (haveInterDomainVsites)
2822         {
2823             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2824                                     "virtual site constructions", "(-rcon)", limit);
2825         }
2826         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2827         {
2828             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2829                                                        1+ir->nProjOrder);
2830             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2831                                     separation.c_str(), "(-rcon)", limit);
2832         }
2833         log->ensureLineBreak();
2834     }
2835 }
2836
2837 static void logSettings(const gmx::MDLogger &mdlog,
2838                         gmx_domdec_t        *dd,
2839                         const gmx_mtop_t    *mtop,
2840                         const t_inputrec    *ir,
2841                         real                 dlb_scale,
2842                         const gmx_ddbox_t   *ddbox)
2843 {
2844     gmx::StringOutputStream stream;
2845     gmx::TextWriter         log(&stream);
2846     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2847     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2848     {
2849         {
2850             log.ensureEmptyLine();
2851             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2852         }
2853         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2854     }
2855     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2856 }
2857
2858 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2859                                 gmx_domdec_t        *dd,
2860                                 real                 dlb_scale,
2861                                 const t_inputrec    *ir,
2862                                 const gmx_ddbox_t   *ddbox)
2863 {
2864     gmx_domdec_comm_t *comm;
2865     int                d, dim, npulse, npulse_d_max, npulse_d;
2866     gmx_bool           bNoCutOff;
2867
2868     comm = dd->comm;
2869
2870     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2871
2872     /* Determine the maximum number of comm. pulses in one dimension */
2873
2874     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2875
2876     /* Determine the maximum required number of grid pulses */
2877     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2878     {
2879         /* Only a single pulse is required */
2880         npulse = 1;
2881     }
2882     else if (!bNoCutOff && comm->cellsize_limit > 0)
2883     {
2884         /* We round down slightly here to avoid overhead due to the latency
2885          * of extra communication calls when the cut-off
2886          * would be only slightly longer than the cell size.
2887          * Later cellsize_limit is redetermined,
2888          * so we can not miss interactions due to this rounding.
2889          */
2890         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2891     }
2892     else
2893     {
2894         /* There is no cell size limit */
2895         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2896     }
2897
2898     if (!bNoCutOff && npulse > 1)
2899     {
2900         /* See if we can do with less pulses, based on dlb_scale */
2901         npulse_d_max = 0;
2902         for (d = 0; d < dd->ndim; d++)
2903         {
2904             dim      = dd->dim[d];
2905             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2906                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2907             npulse_d_max = std::max(npulse_d_max, npulse_d);
2908         }
2909         npulse = std::min(npulse, npulse_d_max);
2910     }
2911
2912     /* This env var can override npulse */
2913     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2914     if (d > 0)
2915     {
2916         npulse = d;
2917     }
2918
2919     comm->maxpulse       = 1;
2920     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2921     for (d = 0; d < dd->ndim; d++)
2922     {
2923         if (comm->ddSettings.request1DAnd1Pulse)
2924         {
2925             comm->cd[d].np_dlb = 1;
2926         }
2927         else
2928         {
2929             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2930             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2931         }
2932         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2933         {
2934             comm->bVacDLBNoLimit = FALSE;
2935         }
2936     }
2937
2938     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2939     if (!comm->bVacDLBNoLimit)
2940     {
2941         comm->cellsize_limit = std::max(comm->cellsize_limit,
2942                                         comm->systemInfo.cutoff/comm->maxpulse);
2943     }
2944     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2945     /* Set the minimum cell size for each DD dimension */
2946     for (d = 0; d < dd->ndim; d++)
2947     {
2948         if (comm->bVacDLBNoLimit ||
2949             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2950         {
2951             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2952         }
2953         else
2954         {
2955             comm->cellsize_min_dlb[dd->dim[d]] =
2956                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2957         }
2958     }
2959     if (comm->cutoff_mbody <= 0)
2960     {
2961         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2962     }
2963     if (isDlbOn(comm))
2964     {
2965         set_dlb_limits(dd);
2966     }
2967 }
2968
2969 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2970 {
2971     return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2972 }
2973
2974 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2975 {
2976     /* If each molecule is a single charge group
2977      * or we use domain decomposition for each periodic dimension,
2978      * we do not need to take pbc into account for the bonded interactions.
2979      */
2980     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2981             !(dd->nc[XX] > 1 &&
2982               dd->nc[YY] > 1 &&
2983               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2984 }
2985
2986 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2987 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2988                                   gmx_domdec_t *dd, real dlb_scale,
2989                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2990                                   const gmx_ddbox_t *ddbox)
2991 {
2992     gmx_domdec_comm_t *comm        = dd->comm;
2993     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2994
2995     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2996     {
2997         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2998         if (ddRankSetup.npmedecompdim >= 2)
2999         {
3000             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
3001         }
3002     }
3003     else
3004     {
3005         ddRankSetup.numRanksDoingPme = 0;
3006         if (dd->pme_nodeid >= 0)
3007         {
3008             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
3009                                  "Can not have separate PME ranks without PME electrostatics");
3010         }
3011     }
3012
3013     if (debug)
3014     {
3015         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
3016     }
3017     if (!isDlbDisabled(comm))
3018     {
3019         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
3020     }
3021
3022     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
3023
3024     real vol_frac;
3025     if (ir->ePBC == epbcNONE)
3026     {
3027         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
3028     }
3029     else
3030     {
3031         vol_frac =
3032             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
3033     }
3034     if (debug)
3035     {
3036         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
3037     }
3038     int natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
3039
3040     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
3041                                      static_cast<int>(vol_frac*natoms_tot));
3042 }
3043
3044 /*! \brief Get some important DD parameters which can be modified by env.vars */
3045 static DDSettings
3046 getDDSettings(const gmx::MDLogger     &mdlog,
3047               const DomdecOptions     &options,
3048               const gmx::MdrunOptions &mdrunOptions,
3049               const t_inputrec        &ir)
3050 {
3051     DDSettings ddSettings;
3052
3053     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
3054     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
3055     // TODO GPU halo exchange requires a 1D single-pulse DD, and when
3056     // it is properly integrated the hack with GMX_GPU_DD_COMMS should
3057     // be removed.
3058     ddSettings.request1DAnd1Pulse  = (bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0)) ||
3059                                       (bool(getenv("GMX_GPU_DD_COMMS") != nullptr &&
3060                                             GMX_THREAD_MPI &&
3061                                             (GMX_GPU == GMX_GPU_CUDA))));
3062     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
3063     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
3064     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
3065     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
3066     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
3067     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
3068     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
3069
3070     if (ddSettings.useSendRecv2)
3071     {
3072         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
3073     }
3074
3075     if (ddSettings.eFlop)
3076     {
3077         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
3078         ddSettings.recordLoad = true;
3079     }
3080     else
3081     {
3082         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
3083     }
3084
3085     ddSettings.initialDlbState =
3086         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
3087     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
3088                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
3089
3090     return ddSettings;
3091 }
3092
3093 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
3094     unitCellInfo(ir)
3095 {
3096 }
3097
3098 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
3099                                         t_commrec                     *cr,
3100                                         const DomdecOptions           &options,
3101                                         const gmx::MdrunOptions       &mdrunOptions,
3102                                         const gmx_mtop_t              *mtop,
3103                                         const t_inputrec              *ir,
3104                                         const matrix                   box,
3105                                         gmx::ArrayRef<const gmx::RVec> xGlobal,
3106                                         gmx::LocalAtomSetManager      *atomSets)
3107 {
3108     GMX_LOG(mdlog.info).appendTextFormatted(
3109             "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3110
3111     DDSettings  ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3112     if (ddSettings.eFlop > 1)
3113     {
3114         /* Ensure that we have different random flop counts on different ranks */
3115         srand(1 + cr->nodeid);
3116     }
3117
3118     DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3119
3120     gmx_ddbox_t  ddbox       = {0};
3121     DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
3122                                               mtop, ir, box, xGlobal, &ddbox);
3123
3124     cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3125
3126     DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3127
3128     /* Generate the group communicator, also decides the duty of each rank */
3129     ivec               ddCellIndex = { 0, 0, 0 };
3130     std::vector<int>   pmeRanks;
3131     CartesianRankSetup cartSetup   =
3132         makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3133                                ddRankSetup, cr,
3134                                ddCellIndex, &pmeRanks);
3135
3136     gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3137
3138     copy_ivec(ddCellIndex, dd->ci);
3139
3140     dd->comm = init_dd_comm();
3141
3142     dd->comm->ddRankSetup        = ddRankSetup;
3143     dd->comm->cartesianRankSetup = cartSetup;
3144
3145     set_dd_limits(mdlog, cr, dd, options,
3146                   ddSettings, systemInfo, ddGridSetup,
3147                   mtop, ir,
3148                   ddbox);
3149
3150     setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, dd);
3151
3152     if (thisRankHasDuty(cr, DUTY_PP))
3153     {
3154         set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3155
3156         setup_neighbor_relations(dd);
3157     }
3158
3159     /* Set overallocation to avoid frequent reallocation of arrays */
3160     set_over_alloc_dd(TRUE);
3161
3162     dd->atomSets = atomSets;
3163
3164     return dd;
3165 }
3166
3167 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3168                                const matrix                   box,
3169                                gmx::ArrayRef<const gmx::RVec> x,
3170                                real                           cutoffRequested)
3171 {
3172     gmx_domdec_t *dd;
3173     gmx_ddbox_t   ddbox;
3174     int           d, dim, np;
3175     real          inv_cell_size;
3176     int           LocallyLimited;
3177
3178     dd = cr->dd;
3179
3180     set_ddbox(*dd, false, box, true, x, &ddbox);
3181
3182     LocallyLimited = 0;
3183
3184     for (d = 0; d < dd->ndim; d++)
3185     {
3186         dim = dd->dim[d];
3187
3188         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3189         if (dd->unitCellInfo.ddBoxIsDynamic)
3190         {
3191             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3192         }
3193
3194         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3195
3196         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3197         {
3198             if (np > dd->comm->cd[d].np_dlb)
3199             {
3200                 return FALSE;
3201             }
3202
3203             /* If a current local cell size is smaller than the requested
3204              * cut-off, we could still fix it, but this gets very complicated.
3205              * Without fixing here, we might actually need more checks.
3206              */
3207             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3208             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3209             {
3210                 LocallyLimited = 1;
3211             }
3212         }
3213     }
3214
3215     if (!isDlbDisabled(dd->comm))
3216     {
3217         /* If DLB is not active yet, we don't need to check the grid jumps.
3218          * Actually we shouldn't, because then the grid jump data is not set.
3219          */
3220         if (isDlbOn(dd->comm) &&
3221             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3222         {
3223             LocallyLimited = 1;
3224         }
3225
3226         gmx_sumi(1, &LocallyLimited, cr);
3227
3228         if (LocallyLimited > 0)
3229         {
3230             return FALSE;
3231         }
3232     }
3233
3234     return TRUE;
3235 }
3236
3237 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3238                           const matrix                   box,
3239                           gmx::ArrayRef<const gmx::RVec> x,
3240                           real                           cutoffRequested)
3241 {
3242     gmx_bool bCutoffAllowed;
3243
3244     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3245
3246     if (bCutoffAllowed)
3247     {
3248         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3249     }
3250
3251     return bCutoffAllowed;
3252 }