src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/collect.h"
  54 #include "gromacs/domdec/dlb.h"
  55 #include "gromacs/domdec/dlbtiming.h"
  56 #include "gromacs/domdec/domdec_network.h"
  57 #include "gromacs/domdec/ga2la.h"
  58 #include "gromacs/domdec/gpuhaloexchange.h"
  59 #include "gromacs/domdec/options.h"
  60 #include "gromacs/domdec/partition.h"
  61 #include "gromacs/gmxlib/network.h"
  62 #include "gromacs/gmxlib/nrnb.h"
  63 #include "gromacs/gpu_utils/gpu_utils.h"
  64 #include "gromacs/hardware/hw_info.h"
  65 #include "gromacs/listed_forces/manage_threading.h"
  66 #include "gromacs/math/vec.h"
  67 #include "gromacs/math/vectypes.h"
  68 #include "gromacs/mdlib/calc_verletbuf.h"
  69 #include "gromacs/mdlib/constr.h"
  70 #include "gromacs/mdlib/constraintrange.h"
  71 #include "gromacs/mdlib/updategroups.h"
  72 #include "gromacs/mdlib/vsite.h"
  73 #include "gromacs/mdtypes/commrec.h"
  74 #include "gromacs/mdtypes/forceoutput.h"
  75 #include "gromacs/mdtypes/inputrec.h"
  76 #include "gromacs/mdtypes/mdrunoptions.h"
  77 #include "gromacs/mdtypes/state.h"
  78 #include "gromacs/pbcutil/ishift.h"
  79 #include "gromacs/pbcutil/pbc.h"
  80 #include "gromacs/pulling/pull.h"
  81 #include "gromacs/timing/wallcycle.h"
  82 #include "gromacs/topology/block.h"
  83 #include "gromacs/topology/idef.h"
  84 #include "gromacs/topology/ifunc.h"
  85 #include "gromacs/topology/mtop_lookup.h"
  86 #include "gromacs/topology/mtop_util.h"
  87 #include "gromacs/topology/topology.h"
  88 #include "gromacs/utility/basedefinitions.h"
  89 #include "gromacs/utility/basenetwork.h"
  90 #include "gromacs/utility/cstringutil.h"
  91 #include "gromacs/utility/exceptions.h"
  92 #include "gromacs/utility/fatalerror.h"
  93 #include "gromacs/utility/gmxmpi.h"
  94 #include "gromacs/utility/logger.h"
  95 #include "gromacs/utility/real.h"
  96 #include "gromacs/utility/smalloc.h"
  97 #include "gromacs/utility/strconvert.h"
  98 #include "gromacs/utility/stringstream.h"
  99 #include "gromacs/utility/stringutil.h"
 100 #include "gromacs/utility/textwriter.h"
 101
 102 #include "atomdistribution.h"
 103 #include "box.h"
 104 #include "cellsizes.h"
 105 #include "distribute.h"
 106 #include "domdec_constraints.h"
 107 #include "domdec_internal.h"
 108 #include "domdec_setup.h"
 109 #include "domdec_vsite.h"
 110 #include "redistribute.h"
 111 #include "utility.h"
 112
 113 // TODO remove this when moving domdec into gmx namespace
 114 using gmx::DdRankOrder;
 115 using gmx::DlbOption;
 116 using gmx::DomdecOptions;
 117
 118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 119
 120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 121 #define DD_CGIBS 2
 122
 123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 124 #define DD_FLAG_NRCG  65535
 125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 127
 128 /* The DD zone order */
 129 static const ivec dd_zo[DD_MAXZONE] =
 130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 131
 132 /* The non-bonded zone-pair setup for domain decomposition
 133  * The first number is the i-zone, the second number the first j-zone seen by
 134  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 135  * As is, this is for 3D decomposition, where there are 4 i-zones.
 136  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 137  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 138  */
 139 static const int
 140     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 141                                                  {1, 3, 6},
 142                                                  {2, 5, 6},
 143                                                  {3, 5, 7}};
 144
 145
 146 /*
 147    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 148
 149    static void index2xyz(ivec nc,int ind,ivec xyz)
 150    {
 151    xyz[XX] = ind % nc[XX];
 152    xyz[YY] = (ind / nc[XX]) % nc[YY];
 153    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 154    }
 155  */
 156
 157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 158 {
 159     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 160     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 161     xyz[ZZ] = ind % nc[ZZ];
 162 }
 163
 164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 165 {
 166     int                       ddnodeid = -1;
 167
 168     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 169     const int                 ddindex   = dd_index(dd->nc, c);
 170     if (cartSetup.bCartesianPP_PME)
 171     {
 172         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 173     }
 174     else if (cartSetup.bCartesianPP)
 175     {
 176 #if GMX_MPI
 177         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 178 #endif
 179     }
 180     else
 181     {
 182         ddnodeid = ddindex;
 183     }
 184
 185     return ddnodeid;
 186 }
 187
 188 int ddglatnr(const gmx_domdec_t *dd, int i)
 189 {
 190     int atnr;
 191
 192     if (dd == nullptr)
 193     {
 194         atnr = i + 1;
 195     }
 196     else
 197     {
 198         if (i >= dd->comm->atomRanges.numAtomsTotal())
 199         {
 200             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 201         }
 202         atnr = dd->globalAtomIndices[i] + 1;
 203     }
 204
 205     return atnr;
 206 }
 207
 208 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 209 {
 210     return &dd->comm->cgs_gl;
 211 }
 212
 213 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 214 {
 215     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 216     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 217 }
 218
 219 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 220 {
 221     int i;
 222
 223     if (state->ddp_count != dd->ddp_count)
 224     {
 225         gmx_incons("The MD state does not match the domain decomposition state");
 226     }
 227
 228     state->cg_gl.resize(dd->ncg_home);
 229     for (i = 0; i < dd->ncg_home; i++)
 230     {
 231         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 232     }
 233
 234     state->ddp_count_cg_gl = dd->ddp_count;
 235 }
 236
 237 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 238 {
 239     return &dd->comm->zones;
 240 }
 241
 242 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
 243                       int *jcg0, int *jcg1, ivec shift0, ivec shift1)
 244 {
 245     gmx_domdec_zones_t *zones;
 246     int                 izone, d, dim;
 247
 248     zones = &dd->comm->zones;
 249
 250     izone = 0;
 251     while (icg >= zones->izone[izone].cg1)
 252     {
 253         izone++;
 254     }
 255
 256     if (izone == 0)
 257     {
 258         *jcg0 = icg;
 259     }
 260     else if (izone < zones->nizone)
 261     {
 262         *jcg0 = zones->izone[izone].jcg0;
 263     }
 264     else
 265     {
 266         gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
 267                   icg, izone, zones->nizone);
 268     }
 269
 270     *jcg1 = zones->izone[izone].jcg1;
 271
 272     for (d = 0; d < dd->ndim; d++)
 273     {
 274         dim         = dd->dim[d];
 275         shift0[dim] = zones->izone[izone].shift0[dim];
 276         shift1[dim] = zones->izone[izone].shift1[dim];
 277         if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
 278         {
 279             /* A conservative approach, this can be optimized */
 280             shift0[dim] -= 1;
 281             shift1[dim] += 1;
 282         }
 283     }
 284 }
 285
 286 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 287 {
 288     return dd.comm->atomRanges.numHomeAtoms();
 289 }
 290
 291 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 292 {
 293     /* We currently set mdatoms entries for all atoms:
 294      * local + non-local + communicated for vsite + constraints
 295      */
 296
 297     return dd->comm->atomRanges.numAtomsTotal();
 298 }
 299
 300 int dd_natoms_vsite(const gmx_domdec_t *dd)
 301 {
 302     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 303 }
 304
 305 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 306 {
 307     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 308     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 309 }
 310
 311 void dd_move_x(gmx_domdec_t             *dd,
 312                const matrix              box,
 313                gmx::ArrayRef<gmx::RVec>  x,
 314                gmx_wallcycle            *wcycle)
 315 {
 316     wallcycle_start(wcycle, ewcMOVEX);
 317
 318     int                    nzone, nat_tot;
 319     gmx_domdec_comm_t     *comm;
 320     gmx_domdec_comm_dim_t *cd;
 321     rvec                   shift = {0, 0, 0};
 322     gmx_bool               bPBC, bScrew;
 323
 324     comm = dd->comm;
 325
 326     nzone   = 1;
 327     nat_tot = comm->atomRanges.numHomeAtoms();
 328     for (int d = 0; d < dd->ndim; d++)
 329     {
 330         bPBC   = (dd->ci[dd->dim[d]] == 0);
 331         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 332         if (bPBC)
 333         {
 334             copy_rvec(box[dd->dim[d]], shift);
 335         }
 336         cd = &comm->cd[d];
 337         for (const gmx_domdec_ind_t &ind : cd->ind)
 338         {
 339             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 340             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 341             int                        n          = 0;
 342             if (!bPBC)
 343             {
 344                 for (int j : ind.index)
 345                 {
 346                     sendBuffer[n] = x[j];
 347                     n++;
 348                 }
 349             }
 350             else if (!bScrew)
 351             {
 352                 for (int j : ind.index)
 353                 {
 354                     /* We need to shift the coordinates */
 355                     for (int d = 0; d < DIM; d++)
 356                     {
 357                         sendBuffer[n][d] = x[j][d] + shift[d];
 358                     }
 359                     n++;
 360                 }
 361             }
 362             else
 363             {
 364                 for (int j : ind.index)
 365                 {
 366                     /* Shift x */
 367                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 368                     /* Rotate y and z.
 369                      * This operation requires a special shift force
 370                      * treatment, which is performed in calc_vir.
 371                      */
 372                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 373                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 374                     n++;
 375                 }
 376             }
 377
 378             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 379
 380             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 381             if (cd->receiveInPlace)
 382             {
 383                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 384             }
 385             else
 386             {
 387                 receiveBuffer = receiveBufferAccess.buffer;
 388             }
 389             /* Send and receive the coordinates */
 390             ddSendrecv(dd, d, dddirBackward,
 391                        sendBuffer, receiveBuffer);
 392
 393             if (!cd->receiveInPlace)
 394             {
 395                 int j = 0;
 396                 for (int zone = 0; zone < nzone; zone++)
 397                 {
 398                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 399                     {
 400                         x[i] = receiveBuffer[j++];
 401                     }
 402                 }
 403             }
 404             nat_tot += ind.nrecv[nzone+1];
 405         }
 406         nzone += nzone;
 407     }
 408
 409     wallcycle_stop(wcycle, ewcMOVEX);
 410 }
 411
 412 void dd_move_f(gmx_domdec_t              *dd,
 413                gmx::ForceWithShiftForces *forceWithShiftForces,
 414                gmx_wallcycle             *wcycle)
 415 {
 416     wallcycle_start(wcycle, ewcMOVEF);
 417
 418     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 419     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 420
 421     gmx_domdec_comm_t       &comm    = *dd->comm;
 422     int                      nzone   = comm.zones.n/2;
 423     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 424     for (int d = dd->ndim-1; d >= 0; d--)
 425     {
 426         /* Only forces in domains near the PBC boundaries need to
 427            consider PBC in the treatment of fshift */
 428         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 429         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 430         /* Determine which shift vector we need */
 431         ivec       vis   = { 0, 0, 0 };
 432         vis[dd->dim[d]]  = 1;
 433         const int  is    = IVEC2IS(vis);
 434
 435         /* Loop over the pulses */
 436         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 437         for (int p = cd.numPulses() - 1; p >= 0; p--)
 438         {
 439             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 440             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 441             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 442
 443             nat_tot                                 -= ind.nrecv[nzone+1];
 444
 445             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 446
 447             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 448             if (cd.receiveInPlace)
 449             {
 450                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 451             }
 452             else
 453             {
 454                 sendBuffer = sendBufferAccess.buffer;
 455                 int j = 0;
 456                 for (int zone = 0; zone < nzone; zone++)
 457                 {
 458                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 459                     {
 460                         sendBuffer[j++] = f[i];
 461                     }
 462                 }
 463             }
 464             /* Communicate the forces */
 465             ddSendrecv(dd, d, dddirForward,
 466                        sendBuffer, receiveBuffer);
 467             /* Add the received forces */
 468             int n = 0;
 469             if (!shiftForcesNeedPbc)
 470             {
 471                 for (int j : ind.index)
 472                 {
 473                     for (int d = 0; d < DIM; d++)
 474                     {
 475                         f[j][d] += receiveBuffer[n][d];
 476                     }
 477                     n++;
 478                 }
 479             }
 480             else if (!applyScrewPbc)
 481             {
 482                 for (int j : ind.index)
 483                 {
 484                     for (int d = 0; d < DIM; d++)
 485                     {
 486                         f[j][d] += receiveBuffer[n][d];
 487                     }
 488                     /* Add this force to the shift force */
 489                     for (int d = 0; d < DIM; d++)
 490                     {
 491                         fshift[is][d] += receiveBuffer[n][d];
 492                     }
 493                     n++;
 494                 }
 495             }
 496             else
 497             {
 498                 for (int j : ind.index)
 499                 {
 500                     /* Rotate the force */
 501                     f[j][XX] += receiveBuffer[n][XX];
 502                     f[j][YY] -= receiveBuffer[n][YY];
 503                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 504                     if (shiftForcesNeedPbc)
 505                     {
 506                         /* Add this force to the shift force */
 507                         for (int d = 0; d < DIM; d++)
 508                         {
 509                             fshift[is][d] += receiveBuffer[n][d];
 510                         }
 511                     }
 512                     n++;
 513                 }
 514             }
 515         }
 516         nzone /= 2;
 517     }
 518     wallcycle_stop(wcycle, ewcMOVEF);
 519 }
 520
 521 /* Convenience function for extracting a real buffer from an rvec buffer
 522  *
 523  * To reduce the number of temporary communication buffers and avoid
 524  * cache polution, we reuse gmx::RVec buffers for storing reals.
 525  * This functions return a real buffer reference with the same number
 526  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 527  */
 528 static gmx::ArrayRef<real>
 529 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 530 {
 531     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 532                                   arrayRef.size());
 533 }
 534
 535 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 536 {
 537     int                    nzone, nat_tot;
 538     gmx_domdec_comm_t     *comm;
 539     gmx_domdec_comm_dim_t *cd;
 540
 541     comm = dd->comm;
 542
 543     nzone   = 1;
 544     nat_tot = comm->atomRanges.numHomeAtoms();
 545     for (int d = 0; d < dd->ndim; d++)
 546     {
 547         cd = &comm->cd[d];
 548         for (const gmx_domdec_ind_t &ind : cd->ind)
 549         {
 550             /* Note: We provision for RVec instead of real, so a factor of 3
 551              * more than needed. The buffer actually already has this size
 552              * and we pass a plain pointer below, so this does not matter.
 553              */
 554             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 555             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 556             int                       n          = 0;
 557             for (int j : ind.index)
 558             {
 559                 sendBuffer[n++] = v[j];
 560             }
 561
 562             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 563
 564             gmx::ArrayRef<real>       receiveBuffer;
 565             if (cd->receiveInPlace)
 566             {
 567                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 568             }
 569             else
 570             {
 571                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 572             }
 573             /* Send and receive the data */
 574             ddSendrecv(dd, d, dddirBackward,
 575                        sendBuffer, receiveBuffer);
 576             if (!cd->receiveInPlace)
 577             {
 578                 int j = 0;
 579                 for (int zone = 0; zone < nzone; zone++)
 580                 {
 581                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 582                     {
 583                         v[i] = receiveBuffer[j++];
 584                     }
 585                 }
 586             }
 587             nat_tot += ind.nrecv[nzone+1];
 588         }
 589         nzone += nzone;
 590     }
 591 }
 592
 593 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 594 {
 595     int                    nzone, nat_tot;
 596     gmx_domdec_comm_t     *comm;
 597     gmx_domdec_comm_dim_t *cd;
 598
 599     comm = dd->comm;
 600
 601     nzone   = comm->zones.n/2;
 602     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 603     for (int d = dd->ndim-1; d >= 0; d--)
 604     {
 605         cd = &comm->cd[d];
 606         for (int p = cd->numPulses() - 1; p >= 0; p--)
 607         {
 608             const gmx_domdec_ind_t &ind = cd->ind[p];
 609
 610             /* Note: We provision for RVec instead of real, so a factor of 3
 611              * more than needed. The buffer actually already has this size
 612              * and we typecast, so this works as intended.
 613              */
 614             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 615             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 616             nat_tot -= ind.nrecv[nzone + 1];
 617
 618             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 619
 620             gmx::ArrayRef<real>       sendBuffer;
 621             if (cd->receiveInPlace)
 622             {
 623                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 624             }
 625             else
 626             {
 627                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 628                 int j = 0;
 629                 for (int zone = 0; zone < nzone; zone++)
 630                 {
 631                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 632                     {
 633                         sendBuffer[j++] = v[i];
 634                     }
 635                 }
 636             }
 637             /* Communicate the forces */
 638             ddSendrecv(dd, d, dddirForward,
 639                        sendBuffer, receiveBuffer);
 640             /* Add the received forces */
 641             int n = 0;
 642             for (int j : ind.index)
 643             {
 644                 v[j] += receiveBuffer[n];
 645                 n++;
 646             }
 647         }
 648         nzone /= 2;
 649     }
 650 }
 651
 652 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 653 {
 654     const gmx_domdec_comm_t &comm       = *dd->comm;
 655     const DDSystemInfo      &systemInfo = comm.systemInfo;
 656
 657     real                     r = -1;
 658     if (systemInfo.haveInterDomainMultiBodyBondeds)
 659     {
 660         if (comm.cutoff_mbody > 0)
 661         {
 662             r = comm.cutoff_mbody;
 663         }
 664         else
 665         {
 666             /* cutoff_mbody=0 means we do not have DLB */
 667             r = comm.cellsize_min[dd->dim[0]];
 668             for (int di = 1; di < dd->ndim; di++)
 669             {
 670                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 671             }
 672             if (comm.systemInfo.filterBondedCommunication)
 673             {
 674                 r = std::max(r, comm.cutoff_mbody);
 675             }
 676             else
 677             {
 678                 r = std::min(r, systemInfo.cutoff);
 679             }
 680         }
 681     }
 682
 683     return r;
 684 }
 685
 686 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 687 {
 688     real r_mb;
 689
 690     r_mb = dd_cutoff_multibody(dd);
 691
 692     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 693 }
 694
 695
 696 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 697                                    const CartesianRankSetup &cartSetup,
 698                                    const ivec                coord,
 699                                    ivec                      coord_pme)
 700 {
 701     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 702     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 703     copy_ivec(coord, coord_pme);
 704     coord_pme[cartSetup.cartpmedim] =
 705         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 706 }
 707
 708 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 709 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 710                             const int          ddCellIndex)
 711 {
 712     const int npp  = ddRankSetup.numPPRanks;
 713     const int npme = ddRankSetup.numRanksDoingPme;
 714
 715     /* Here we assign a PME node to communicate with this DD node
 716      * by assuming that the major index of both is x.
 717      * We add npme/2 to obtain an even distribution.
 718      */
 719     return (ddCellIndex*npme + npme/2)/npp;
 720 }
 721
 722 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 723 {
 724     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 725
 726     int              n = 0;
 727     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 728     {
 729         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 730         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 731         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 732         {
 733             if (debug)
 734             {
 735                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 736             }
 737             pmeRanks[n] = i + 1 + n;
 738             n++;
 739         }
 740     }
 741
 742     return pmeRanks;
 743 }
 744
 745 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 746 {
 747     gmx_domdec_t *dd;
 748     ivec          coords;
 749     int           slab;
 750
 751     dd = cr->dd;
 752     /*
 753        if (dd->comm->bCartesian) {
 754        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 755        dd_coords2pmecoords(dd,coords,coords_pme);
 756        copy_ivec(dd->ntot,nc);
 757        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 758        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 759
 760        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 761        } else {
 762        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 763        }
 764      */
 765     coords[XX] = x;
 766     coords[YY] = y;
 767     coords[ZZ] = z;
 768     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 769
 770     return slab;
 771 }
 772
 773 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 774 {
 775     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 776     ivec                      coords    = { x, y, z };
 777     int                       nodeid    = -1;
 778
 779     if (cartSetup.bCartesianPP_PME)
 780     {
 781 #if GMX_MPI
 782         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 783 #endif
 784     }
 785     else
 786     {
 787         int ddindex = dd_index(cr->dd->nc, coords);
 788         if (cartSetup.bCartesianPP)
 789         {
 790             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 791         }
 792         else
 793         {
 794             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 795             {
 796                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 797             }
 798             else
 799             {
 800                 nodeid = ddindex;
 801             }
 802         }
 803     }
 804
 805     return nodeid;
 806 }
 807
 808 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 809                               const CartesianRankSetup   &cartSetup,
 810                               gmx::ArrayRef<const int>    pmeRanks,
 811                               const t_commrec gmx_unused *cr,
 812                               const int                   sim_nodeid)
 813 {
 814     int                pmenode = -1;
 815
 816     /* This assumes a uniform x domain decomposition grid cell size */
 817     if (cartSetup.bCartesianPP_PME)
 818     {
 819 #if GMX_MPI
 820         ivec coord, coord_pme;
 821         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 822         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 823         {
 824             /* This is a PP rank */
 825             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 826             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 827         }
 828 #endif
 829     }
 830     else if (cartSetup.bCartesianPP)
 831     {
 832         if (sim_nodeid < ddRankSetup.numPPRanks)
 833         {
 834             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 835         }
 836     }
 837     else
 838     {
 839         /* This assumes DD cells with identical x coordinates
 840          * are numbered sequentially.
 841          */
 842         if (pmeRanks.empty())
 843         {
 844             if (sim_nodeid < ddRankSetup.numPPRanks)
 845             {
 846                 /* The DD index equals the nodeid */
 847                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 848             }
 849         }
 850         else
 851         {
 852             int i = 0;
 853             while (sim_nodeid > pmeRanks[i])
 854             {
 855                 i++;
 856             }
 857             if (sim_nodeid < pmeRanks[i])
 858             {
 859                 pmenode = pmeRanks[i];
 860             }
 861         }
 862     }
 863
 864     return pmenode;
 865 }
 866
 867 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 868 {
 869     if (dd != nullptr)
 870     {
 871         return {
 872                    dd->comm->ddRankSetup.npmenodes_x,
 873                    dd->comm->ddRankSetup.npmenodes_y
 874         };
 875     }
 876     else
 877     {
 878         return {
 879                    1, 1
 880         };
 881     }
 882 }
 883
 884 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 885                                  const int        pmenodeid)
 886 {
 887     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 888     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 889     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 890     std::vector<int>          ddranks;
 891     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 892
 893     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 894     {
 895         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 896         {
 897             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 898             {
 899                 if (cartSetup.bCartesianPP_PME)
 900                 {
 901                     ivec coord = { x, y, z };
 902                     ivec coord_pme;
 903                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 904                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 905                         cr->dd->ci[YY] == coord_pme[YY] &&
 906                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 907                     {
 908                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 909                     }
 910                 }
 911                 else
 912                 {
 913                     /* The slab corresponds to the nodeid in the PME group */
 914                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 915                     {
 916                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 917                     }
 918                 }
 919             }
 920         }
 921     }
 922     return ddranks;
 923 }
 924
 925 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 926                                  gmx::ArrayRef<const int>  pmeRanks,
 927                                  const t_commrec          *cr)
 928 {
 929     gmx_bool           bReceive    = TRUE;
 930
 931     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 932     if (ddRankSetup.usePmeOnlyRanks)
 933     {
 934         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 935         if (cartSetup.bCartesianPP_PME)
 936         {
 937 #if GMX_MPI
 938             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 939             ivec coords;
 940             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 941             coords[cartSetup.cartpmedim]++;
 942             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 943             {
 944                 int rank;
 945                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 946                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 947                 {
 948                     /* This is not the last PP node for pmenode */
 949                     bReceive = FALSE;
 950                 }
 951             }
 952 #else
 953             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 954 #endif
 955         }
 956         else
 957         {
 958             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 959             if (cr->sim_nodeid+1 < cr->nnodes &&
 960                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 961             {
 962                 /* This is not the last PP node for pmenode */
 963                 bReceive = FALSE;
 964             }
 965         }
 966     }
 967
 968     return bReceive;
 969 }
 970
 971 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 972 {
 973     gmx_domdec_comm_t *comm;
 974     int                i;
 975
 976     comm = dd->comm;
 977
 978     snew(*dim_f, dd->nc[dim]+1);
 979     (*dim_f)[0] = 0;
 980     for (i = 1; i < dd->nc[dim]; i++)
 981     {
 982         if (comm->slb_frac[dim])
 983         {
 984             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 985         }
 986         else
 987         {
 988             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 989         }
 990     }
 991     (*dim_f)[dd->nc[dim]] = 1;
 992 }
 993
 994 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 995 {
 996     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 997
 998     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 999     {
1000         ddpme->dim = YY;
1001     }
1002     else
1003     {
1004         ddpme->dim = dimind;
1005     }
1006     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1007
1008     ddpme->nslab = (ddpme->dim == 0 ?
1009                     ddRankSetup.npmenodes_x :
1010                     ddRankSetup.npmenodes_y);
1011
1012     if (ddpme->nslab <= 1)
1013     {
1014         return;
1015     }
1016
1017     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
1018     /* Determine for each PME slab the PP location range for dimension dim */
1019     snew(ddpme->pp_min, ddpme->nslab);
1020     snew(ddpme->pp_max, ddpme->nslab);
1021     for (int slab = 0; slab < ddpme->nslab; slab++)
1022     {
1023         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1024         ddpme->pp_max[slab] = 0;
1025     }
1026     for (int i = 0; i < dd->nnodes; i++)
1027     {
1028         ivec xyz;
1029         ddindex2xyz(dd->nc, i, xyz);
1030         /* For y only use our y/z slab.
1031          * This assumes that the PME x grid size matches the DD grid size.
1032          */
1033         if (dimind == 0 || xyz[XX] == dd->ci[XX])
1034         {
1035             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
1036             int       slab;
1037             if (dimind == 0)
1038             {
1039                 slab = pmeindex/nso;
1040             }
1041             else
1042             {
1043                 slab = pmeindex % ddpme->nslab;
1044             }
1045             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1046             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1047         }
1048     }
1049
1050     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1051 }
1052
1053 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1054 {
1055     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1056
1057     if (ddRankSetup.ddpme[0].dim == XX)
1058     {
1059         return ddRankSetup.ddpme[0].maxshift;
1060     }
1061     else
1062     {
1063         return 0;
1064     }
1065 }
1066
1067 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1068 {
1069     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1070
1071     if (ddRankSetup.ddpme[0].dim == YY)
1072     {
1073         return ddRankSetup.ddpme[0].maxshift;
1074     }
1075     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1076     {
1077         return ddRankSetup.ddpme[1].maxshift;
1078     }
1079     else
1080     {
1081         return 0;
1082     }
1083 }
1084
1085 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1086 {
1087     return dd.comm->systemInfo.haveSplitConstraints;
1088 }
1089
1090 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1091 {
1092     /* Note that the cycles value can be incorrect, either 0 or some
1093      * extremely large value, when our thread migrated to another core
1094      * with an unsynchronized cycle counter. If this happens less often
1095      * that once per nstlist steps, this will not cause issues, since
1096      * we later subtract the maximum value from the sum over nstlist steps.
1097      * A zero count will slightly lower the total, but that's a small effect.
1098      * Note that the main purpose of the subtraction of the maximum value
1099      * is to avoid throwing off the load balancing when stalls occur due
1100      * e.g. system activity or network congestion.
1101      */
1102     dd->comm->cycl[ddCycl] += cycles;
1103     dd->comm->cycl_n[ddCycl]++;
1104     if (cycles > dd->comm->cycl_max[ddCycl])
1105     {
1106         dd->comm->cycl_max[ddCycl] = cycles;
1107     }
1108 }
1109
1110 #if GMX_MPI
1111 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1112 {
1113     MPI_Comm           c_row;
1114     int                dim, i, rank;
1115     ivec               loc_c;
1116     gmx_bool           bPartOfGroup = FALSE;
1117
1118     dim = dd->dim[dim_ind];
1119     copy_ivec(loc, loc_c);
1120     for (i = 0; i < dd->nc[dim]; i++)
1121     {
1122         loc_c[dim] = i;
1123         rank       = dd_index(dd->nc, loc_c);
1124         if (rank == dd->rank)
1125         {
1126             /* This process is part of the group */
1127             bPartOfGroup = TRUE;
1128         }
1129     }
1130     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1131                    &c_row);
1132     if (bPartOfGroup)
1133     {
1134         dd->comm->mpi_comm_load[dim_ind] = c_row;
1135         if (!isDlbDisabled(dd->comm))
1136         {
1137             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1138
1139             if (dd->ci[dim] == dd->master_ci[dim])
1140             {
1141                 /* This is the root process of this row */
1142                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1143
1144                 RowMaster &rowMaster = *cellsizes.rowMaster;
1145                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1146                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1147                 rowMaster.isCellMin.resize(dd->nc[dim]);
1148                 if (dim_ind > 0)
1149                 {
1150                     rowMaster.bounds.resize(dd->nc[dim]);
1151                 }
1152                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1153             }
1154             else
1155             {
1156                 /* This is not a root process, we only need to receive cell_f */
1157                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1158             }
1159         }
1160         if (dd->ci[dim] == dd->master_ci[dim])
1161         {
1162             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1163         }
1164     }
1165 }
1166 #endif
1167
1168 void dd_setup_dlb_resource_sharing(t_commrec            *cr,
1169                                    int                   gpu_id)
1170 {
1171 #if GMX_MPI
1172     int           physicalnode_id_hash;
1173     gmx_domdec_t *dd;
1174     MPI_Comm      mpi_comm_pp_physicalnode;
1175
1176     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1177     {
1178         /* Only ranks with short-ranged tasks (currently) use GPUs.
1179          * If we don't have GPUs assigned, there are no resources to share.
1180          */
1181         return;
1182     }
1183
1184     physicalnode_id_hash = gmx_physicalnode_id_hash();
1185
1186     dd = cr->dd;
1187
1188     if (debug)
1189     {
1190         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1191         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1192                 dd->rank, physicalnode_id_hash, gpu_id);
1193     }
1194     /* Split the PP communicator over the physical nodes */
1195     /* TODO: See if we should store this (before), as it's also used for
1196      * for the nodecomm summation.
1197      */
1198     // TODO PhysicalNodeCommunicator could be extended/used to handle
1199     // the need for per-node per-group communicators.
1200     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1201                    &mpi_comm_pp_physicalnode);
1202     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1203                    &dd->comm->mpi_comm_gpu_shared);
1204     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1205     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1206
1207     if (debug)
1208     {
1209         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1210     }
1211
1212     /* Note that some ranks could share a GPU, while others don't */
1213
1214     if (dd->comm->nrank_gpu_shared == 1)
1215     {
1216         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1217     }
1218 #else
1219     GMX_UNUSED_VALUE(cr);
1220     GMX_UNUSED_VALUE(gpu_id);
1221 #endif
1222 }
1223
1224 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1225 {
1226 #if GMX_MPI
1227     int  dim0, dim1, i, j;
1228     ivec loc;
1229
1230     if (debug)
1231     {
1232         fprintf(debug, "Making load communicators\n");
1233     }
1234
1235     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1236     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1237
1238     if (dd->ndim == 0)
1239     {
1240         return;
1241     }
1242
1243     clear_ivec(loc);
1244     make_load_communicator(dd, 0, loc);
1245     if (dd->ndim > 1)
1246     {
1247         dim0 = dd->dim[0];
1248         for (i = 0; i < dd->nc[dim0]; i++)
1249         {
1250             loc[dim0] = i;
1251             make_load_communicator(dd, 1, loc);
1252         }
1253     }
1254     if (dd->ndim > 2)
1255     {
1256         dim0 = dd->dim[0];
1257         for (i = 0; i < dd->nc[dim0]; i++)
1258         {
1259             loc[dim0] = i;
1260             dim1      = dd->dim[1];
1261             for (j = 0; j < dd->nc[dim1]; j++)
1262             {
1263                 loc[dim1] = j;
1264                 make_load_communicator(dd, 2, loc);
1265             }
1266         }
1267     }
1268
1269     if (debug)
1270     {
1271         fprintf(debug, "Finished making load communicators\n");
1272     }
1273 #endif
1274 }
1275
1276 /*! \brief Sets up the relation between neighboring domains and zones */
1277 static void setup_neighbor_relations(gmx_domdec_t *dd)
1278 {
1279     int                     d, dim, i, j, m;
1280     ivec                    tmp, s;
1281     gmx_domdec_zones_t     *zones;
1282     gmx_domdec_ns_ranges_t *izone;
1283     GMX_ASSERT(dd->ndim >= 0, "Must have non-negative number of dimensions for DD");
1284
1285     for (d = 0; d < dd->ndim; d++)
1286     {
1287         dim = dd->dim[d];
1288         copy_ivec(dd->ci, tmp);
1289         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1290         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1291         copy_ivec(dd->ci, tmp);
1292         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1293         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1294         if (debug)
1295         {
1296             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1297                     dd->rank, dim,
1298                     dd->neighbor[d][0],
1299                     dd->neighbor[d][1]);
1300         }
1301     }
1302
1303     int nzone  = (1 << dd->ndim);
1304     GMX_ASSERT(dd->ndim < DIM, "Invalid number of dimensions");
1305     int nizone = (1 << std::max(dd->ndim - 1, 0));
1306     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1307
1308     zones = &dd->comm->zones;
1309
1310     for (i = 0; i < nzone; i++)
1311     {
1312         m = 0;
1313         clear_ivec(zones->shift[i]);
1314         for (d = 0; d < dd->ndim; d++)
1315         {
1316             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1317         }
1318     }
1319
1320     zones->n = nzone;
1321     for (i = 0; i < nzone; i++)
1322     {
1323         for (d = 0; d < DIM; d++)
1324         {
1325             s[d] = dd->ci[d] - zones->shift[i][d];
1326             if (s[d] < 0)
1327             {
1328                 s[d] += dd->nc[d];
1329             }
1330             else if (s[d] >= dd->nc[d])
1331             {
1332                 s[d] -= dd->nc[d];
1333             }
1334         }
1335     }
1336     zones->nizone = nizone;
1337     for (i = 0; i < zones->nizone; i++)
1338     {
1339         assert(ddNonbondedZonePairRanges[i][0] == i);
1340
1341         izone     = &zones->izone[i];
1342         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1343          * j-zones up to nzone.
1344          */
1345         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1346         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1347         for (dim = 0; dim < DIM; dim++)
1348         {
1349             if (dd->nc[dim] == 1)
1350             {
1351                 /* All shifts should be allowed */
1352                 izone->shift0[dim] = -1;
1353                 izone->shift1[dim] = 1;
1354             }
1355             else
1356             {
1357                 /* Determine the min/max j-zone shift wrt the i-zone */
1358                 izone->shift0[dim] = 1;
1359                 izone->shift1[dim] = -1;
1360                 for (j = izone->j0; j < izone->j1; j++)
1361                 {
1362                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1363                     if (shift_diff < izone->shift0[dim])
1364                     {
1365                         izone->shift0[dim] = shift_diff;
1366                     }
1367                     if (shift_diff > izone->shift1[dim])
1368                     {
1369                         izone->shift1[dim] = shift_diff;
1370                     }
1371                 }
1372             }
1373         }
1374     }
1375
1376     if (!isDlbDisabled(dd->comm))
1377     {
1378         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1379     }
1380
1381     if (dd->comm->ddSettings.recordLoad)
1382     {
1383         make_load_communicators(dd);
1384     }
1385 }
1386
1387 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1388                                  gmx_domdec_t         *dd,
1389                                  t_commrec gmx_unused *cr,
1390                                  bool gmx_unused       reorder)
1391 {
1392 #if GMX_MPI
1393     gmx_domdec_comm_t  *comm      = dd->comm;
1394     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1395
1396     if (cartSetup.bCartesianPP)
1397     {
1398         /* Set up cartesian communication for the particle-particle part */
1399         GMX_LOG(mdlog.info).appendTextFormatted(
1400                 "Will use a Cartesian communicator: %d x %d x %d",
1401                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1402
1403         ivec periods;
1404         for (int i = 0; i < DIM; i++)
1405         {
1406             periods[i] = TRUE;
1407         }
1408         MPI_Comm comm_cart;
1409         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1410                         &comm_cart);
1411         /* We overwrite the old communicator with the new cartesian one */
1412         cr->mpi_comm_mygroup = comm_cart;
1413     }
1414
1415     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1416     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1417
1418     if (cartSetup.bCartesianPP_PME)
1419     {
1420         /* Since we want to use the original cartesian setup for sim,
1421          * and not the one after split, we need to make an index.
1422          */
1423         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1424         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1425         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1426         /* Get the rank of the DD master,
1427          * above we made sure that the master node is a PP node.
1428          */
1429         int rank;
1430         if (MASTER(cr))
1431         {
1432             rank = dd->rank;
1433         }
1434         else
1435         {
1436             rank = 0;
1437         }
1438         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1439     }
1440     else if (cartSetup.bCartesianPP)
1441     {
1442         if (!comm->ddRankSetup.usePmeOnlyRanks)
1443         {
1444             /* The PP communicator is also
1445              * the communicator for this simulation
1446              */
1447             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1448         }
1449         cr->nodeid = dd->rank;
1450
1451         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1452
1453         /* We need to make an index to go from the coordinates
1454          * to the nodeid of this simulation.
1455          */
1456         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1457         std::vector<int> buf(dd->nnodes);
1458         if (thisRankHasDuty(cr, DUTY_PP))
1459         {
1460             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1461         }
1462         /* Communicate the ddindex to simulation nodeid index */
1463         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1464                       cr->mpi_comm_mysim);
1465
1466         /* Determine the master coordinates and rank.
1467          * The DD master should be the same node as the master of this sim.
1468          */
1469         for (int i = 0; i < dd->nnodes; i++)
1470         {
1471             if (cartSetup.ddindex2simnodeid[i] == 0)
1472             {
1473                 ddindex2xyz(dd->nc, i, dd->master_ci);
1474                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1475             }
1476         }
1477         if (debug)
1478         {
1479             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1480         }
1481     }
1482     else
1483     {
1484         /* No Cartesian communicators */
1485         /* We use the rank in dd->comm->all as DD index */
1486         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1487         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1488         dd->masterrank = 0;
1489         clear_ivec(dd->master_ci);
1490     }
1491 #endif
1492
1493     GMX_LOG(mdlog.info).appendTextFormatted(
1494             "Domain decomposition rank %d, coordinates %d %d %d\n",
1495             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1496     if (debug)
1497     {
1498         fprintf(debug,
1499                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1500                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1501     }
1502 }
1503
1504 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1505                                       t_commrec            *cr)
1506 {
1507 #if GMX_MPI
1508     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1509
1510     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1511     {
1512         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1513         std::vector<int> buf(dd->nnodes);
1514         if (thisRankHasDuty(cr, DUTY_PP))
1515         {
1516             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1517         }
1518         /* Communicate the ddindex to simulation nodeid index */
1519         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1520                       cr->mpi_comm_mysim);
1521     }
1522 #else
1523     GMX_UNUSED_VALUE(dd);
1524     GMX_UNUSED_VALUE(cr);
1525 #endif
1526 }
1527
1528 static CartesianRankSetup
1529 split_communicator(const gmx::MDLogger    &mdlog,
1530                    t_commrec              *cr,
1531                    const DdRankOrder       ddRankOrder,
1532                    bool gmx_unused         reorder,
1533                    const DDRankSetup      &ddRankSetup,
1534                    ivec                    ddCellIndex,
1535                    std::vector<int>       *pmeRanks)
1536 {
1537     CartesianRankSetup cartSetup;
1538
1539     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1540     cartSetup.bCartesianPP_PME = false;
1541
1542     const ivec &numDDCells     = ddRankSetup.numPPCells;
1543     /* Initially we set ntot to the number of PP cells */
1544     copy_ivec(numDDCells, cartSetup.ntot);
1545
1546     if (cartSetup.bCartesianPP)
1547     {
1548         const int numDDCellsTot = ddRankSetup.numPPRanks;
1549         bool      bDiv[DIM];
1550         for (int i = 1; i < DIM; i++)
1551         {
1552             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1553         }
1554         if (bDiv[YY] || bDiv[ZZ])
1555         {
1556             cartSetup.bCartesianPP_PME = TRUE;
1557             /* If we have 2D PME decomposition, which is always in x+y,
1558              * we stack the PME only nodes in z.
1559              * Otherwise we choose the direction that provides the thinnest slab
1560              * of PME only nodes as this will have the least effect
1561              * on the PP communication.
1562              * But for the PME communication the opposite might be better.
1563              */
1564             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1565                              !bDiv[YY] ||
1566                              numDDCells[YY] > numDDCells[ZZ]))
1567             {
1568                 cartSetup.cartpmedim = ZZ;
1569             }
1570             else
1571             {
1572                 cartSetup.cartpmedim = YY;
1573             }
1574             cartSetup.ntot[cartSetup.cartpmedim]
1575                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1576         }
1577         else
1578         {
1579             GMX_LOG(mdlog.info).appendTextFormatted(
1580                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1581                     ddRankSetup.numRanksDoingPme,
1582                     numDDCells[XX], numDDCells[YY],
1583                     numDDCells[XX], numDDCells[ZZ]);
1584             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1585         }
1586     }
1587
1588     if (cartSetup.bCartesianPP_PME)
1589     {
1590 #if GMX_MPI
1591         int  rank;
1592         ivec periods;
1593
1594         GMX_LOG(mdlog.info).appendTextFormatted(
1595                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1596                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1597
1598         for (int i = 0; i < DIM; i++)
1599         {
1600             periods[i] = TRUE;
1601         }
1602         MPI_Comm comm_cart;
1603         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1604                         &comm_cart);
1605         MPI_Comm_rank(comm_cart, &rank);
1606         if (MASTER(cr) && rank != 0)
1607         {
1608             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1609         }
1610
1611         /* With this assigment we loose the link to the original communicator
1612          * which will usually be MPI_COMM_WORLD, unless have multisim.
1613          */
1614         cr->mpi_comm_mysim = comm_cart;
1615         cr->sim_nodeid     = rank;
1616
1617         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1618
1619         GMX_LOG(mdlog.info).appendTextFormatted(
1620                 "Cartesian rank %d, coordinates %d %d %d\n",
1621                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1622
1623         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1624         {
1625             cr->duty = DUTY_PP;
1626         }
1627         if (!ddRankSetup.usePmeOnlyRanks ||
1628             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1629         {
1630             cr->duty = DUTY_PME;
1631         }
1632
1633         /* Split the sim communicator into PP and PME only nodes */
1634         MPI_Comm_split(cr->mpi_comm_mysim,
1635                        getThisRankDuties(cr),
1636                        dd_index(cartSetup.ntot, ddCellIndex),
1637                        &cr->mpi_comm_mygroup);
1638 #else
1639         GMX_UNUSED_VALUE(ddCellIndex);
1640 #endif
1641     }
1642     else
1643     {
1644         switch (ddRankOrder)
1645         {
1646             case DdRankOrder::pp_pme:
1647                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1648                 break;
1649             case DdRankOrder::interleave:
1650                 /* Interleave the PP-only and PME-only ranks */
1651                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1652                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1653                 break;
1654             case DdRankOrder::cartesian:
1655                 break;
1656             default:
1657                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1658         }
1659
1660         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1661         {
1662             cr->duty = DUTY_PME;
1663         }
1664         else
1665         {
1666             cr->duty = DUTY_PP;
1667         }
1668 #if GMX_MPI
1669         /* Split the sim communicator into PP and PME only nodes */
1670         MPI_Comm_split(cr->mpi_comm_mysim,
1671                        getThisRankDuties(cr),
1672                        cr->nodeid,
1673                        &cr->mpi_comm_mygroup);
1674         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1675 #endif
1676     }
1677
1678     GMX_LOG(mdlog.info).appendTextFormatted(
1679             "This rank does only %s work.\n",
1680             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1681
1682     return cartSetup;
1683 }
1684
1685 /*! \brief Makes the PP communicator and the PME communicator, when needed
1686  *
1687  * Returns the Cartesian rank setup.
1688  * Sets \p cr->mpi_comm_mygroup
1689  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1690  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1691  */
1692 static CartesianRankSetup
1693 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1694                        const DDSettings    &ddSettings,
1695                        const DdRankOrder    ddRankOrder,
1696                        const DDRankSetup   &ddRankSetup,
1697                        t_commrec           *cr,
1698                        ivec                 ddCellIndex,
1699                        std::vector<int>    *pmeRanks)
1700 {
1701     CartesianRankSetup cartSetup;
1702
1703     if (ddRankSetup.usePmeOnlyRanks)
1704     {
1705         /* Split the communicator into a PP and PME part */
1706         cartSetup =
1707             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1708                                ddRankSetup, ddCellIndex, pmeRanks);
1709     }
1710     else
1711     {
1712         /* All nodes do PP and PME */
1713         /* We do not require separate communicators */
1714         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1715
1716         cartSetup.bCartesianPP     = false;
1717         cartSetup.bCartesianPP_PME = false;
1718     }
1719
1720     return cartSetup;
1721 }
1722
1723 /*! \brief For PP ranks, sets or makes the communicator
1724  *
1725  * For PME ranks get the rank id.
1726  * For PP only ranks, sets the PME-only rank.
1727  */
1728 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1729                                     const DDSettings         &ddSettings,
1730                                     gmx::ArrayRef<const int>  pmeRanks,
1731                                     t_commrec                *cr,
1732                                     gmx_domdec_t             *dd)
1733 {
1734     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1735     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1736
1737     if (thisRankHasDuty(cr, DUTY_PP))
1738     {
1739         /* Copy or make a new PP communicator */
1740
1741         /* We (possibly) reordered the nodes in split_communicator,
1742          * so it is no longer required in make_pp_communicator.
1743          */
1744         const bool useCartesianReorder =
1745             (ddSettings.useCartesianReorder &&
1746              !cartSetup.bCartesianPP_PME);
1747
1748         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1749     }
1750     else
1751     {
1752         receive_ddindex2simnodeid(dd, cr);
1753     }
1754
1755     if (!thisRankHasDuty(cr, DUTY_PME))
1756     {
1757         /* Set up the commnuication to our PME node */
1758         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1759         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1760         if (debug)
1761         {
1762             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1763                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1764         }
1765     }
1766     else
1767     {
1768         dd->pme_nodeid = -1;
1769     }
1770
1771     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1772     if (MASTER(cr))
1773     {
1774         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1775                                                     dd->comm->cgs_gl.nr,
1776                                                     dd->comm->cgs_gl.index[dd->comm->cgs_gl.nr]);
1777     }
1778 }
1779
1780 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1781                           const char *dir, int nc, const char *size_string)
1782 {
1783     real  *slb_frac, tot;
1784     int    i, n;
1785     double dbl;
1786
1787     slb_frac = nullptr;
1788     if (nc > 1 && size_string != nullptr)
1789     {
1790         GMX_LOG(mdlog.info).appendTextFormatted(
1791                 "Using static load balancing for the %s direction", dir);
1792         snew(slb_frac, nc);
1793         tot = 0;
1794         for (i = 0; i < nc; i++)
1795         {
1796             dbl = 0;
1797             sscanf(size_string, "%20lf%n", &dbl, &n);
1798             if (dbl == 0)
1799             {
1800                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1801             }
1802             slb_frac[i]  = dbl;
1803             size_string += n;
1804             tot         += slb_frac[i];
1805         }
1806         /* Normalize */
1807         std::string relativeCellSizes = "Relative cell sizes:";
1808         for (i = 0; i < nc; i++)
1809         {
1810             slb_frac[i]       /= tot;
1811             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1812         }
1813         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1814     }
1815
1816     return slb_frac;
1817 }
1818
1819 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1820 {
1821     int                  n     = 0;
1822     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1823     int                  nmol;
1824     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1825     {
1826         for (auto &ilist : extractILists(*ilists, IF_BOND))
1827         {
1828             if (NRAL(ilist.functionType) >  2)
1829             {
1830                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1831             }
1832         }
1833     }
1834
1835     return n;
1836 }
1837
1838 static int dd_getenv(const gmx::MDLogger &mdlog,
1839                      const char *env_var, int def)
1840 {
1841     char *val;
1842     int   nst;
1843
1844     nst = def;
1845     val = getenv(env_var);
1846     if (val)
1847     {
1848         if (sscanf(val, "%20d", &nst) <= 0)
1849         {
1850             nst = 1;
1851         }
1852         GMX_LOG(mdlog.info).appendTextFormatted(
1853                 "Found env.var. %s = %s, using value %d",
1854                 env_var, val, nst);
1855     }
1856
1857     return nst;
1858 }
1859
1860 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1861                                   const t_inputrec    *ir,
1862                                   const gmx::MDLogger &mdlog)
1863 {
1864     if (ir->ePBC == epbcSCREW &&
1865         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1866     {
1867         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1868     }
1869
1870     if (ir->ns_type == ensSIMPLE)
1871     {
1872         gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1873     }
1874
1875     if (ir->nstlist == 0)
1876     {
1877         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1878     }
1879
1880     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1881     {
1882         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1883     }
1884 }
1885
1886 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1887                                  const ivec         numDomains)
1888 {
1889     real r = ddbox.box_size[XX];
1890     for (int d = 0; d < DIM; d++)
1891     {
1892         if (numDomains[d] > 1)
1893         {
1894             /* Check using the initial average cell size */
1895             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1896         }
1897     }
1898
1899     return r;
1900 }
1901
1902 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1903  */
1904 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1905                                   const std::string   &reasonStr,
1906                                   const gmx::MDLogger &mdlog)
1907 {
1908     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1909     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1910
1911     if (cmdlineDlbState == DlbState::onUser)
1912     {
1913         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1914     }
1915     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1916     {
1917         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1918     }
1919     return DlbState::offForever;
1920 }
1921
1922 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1923  *
1924  * This function parses the parameters of "-dlb" command line option setting
1925  * corresponding state values. Then it checks the consistency of the determined
1926  * state with other run parameters and settings. As a result, the initial state
1927  * may be altered or an error may be thrown if incompatibility of options is detected.
1928  *
1929  * \param [in] mdlog       Logger.
1930  * \param [in] dlbOption   Enum value for the DLB option.
1931  * \param [in] bRecordLoad True if the load balancer is recording load information.
1932  * \param [in] mdrunOptions  Options for mdrun.
1933  * \param [in] ir          Pointer mdrun to input parameters.
1934  * \returns                DLB initial/startup state.
1935  */
1936 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1937                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1938                                          const gmx::MdrunOptions &mdrunOptions,
1939                                          const t_inputrec *ir)
1940 {
1941     DlbState dlbState = DlbState::offCanTurnOn;
1942
1943     switch (dlbOption)
1944     {
1945         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1946         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1947         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1948         default: gmx_incons("Invalid dlbOption enum value");
1949     }
1950
1951     /* Reruns don't support DLB: bail or override auto mode */
1952     if (mdrunOptions.rerun)
1953     {
1954         std::string reasonStr = "it is not supported in reruns.";
1955         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1956     }
1957
1958     /* Unsupported integrators */
1959     if (!EI_DYNAMICS(ir->eI))
1960     {
1961         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1962         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1963     }
1964
1965     /* Without cycle counters we can't time work to balance on */
1966     if (!bRecordLoad)
1967     {
1968         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1969         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1970     }
1971
1972     if (mdrunOptions.reproducible)
1973     {
1974         std::string reasonStr = "you started a reproducible run.";
1975         switch (dlbState)
1976         {
1977             case DlbState::offUser:
1978                 break;
1979             case DlbState::offForever:
1980                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1981                 break;
1982             case DlbState::offCanTurnOn:
1983                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1984             case DlbState::onCanTurnOff:
1985                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1986                 break;
1987             case DlbState::onUser:
1988                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1989             default:
1990                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1991         }
1992     }
1993
1994     return dlbState;
1995 }
1996
1997 /* Sets the order of the DD dimensions, returns the number of DD dimensions */
1998 static int set_dd_dim(const ivec        numDDCells,
1999                       const DDSettings &ddSettings,
2000                       ivec              dims)
2001 {
2002     int ndim = 0;
2003     if (ddSettings.useDDOrderZYX)
2004     {
2005         /* Decomposition order z,y,x */
2006         for (int dim = DIM - 1; dim >= 0; dim--)
2007         {
2008             if (numDDCells[dim] > 1)
2009             {
2010                 dims[ndim++] = dim;
2011             }
2012         }
2013     }
2014     else
2015     {
2016         /* Decomposition order x,y,z */
2017         for (int dim = 0; dim < DIM; dim++)
2018         {
2019             if (numDDCells[dim] > 1)
2020             {
2021                 dims[ndim++] = dim;
2022             }
2023         }
2024     }
2025
2026     if (ndim == 0)
2027     {
2028         /* Set dim[0] to avoid extra checks on ndim in several places */
2029         dims[0] = XX;
2030     }
2031
2032     return ndim;
2033 }
2034
2035 static gmx_domdec_comm_t *init_dd_comm()
2036 {
2037     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2038
2039     comm->n_load_have      = 0;
2040     comm->n_load_collect   = 0;
2041
2042     comm->haveTurnedOffDlb = false;
2043
2044     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2045     {
2046         comm->sum_nat[i] = 0;
2047     }
2048     comm->ndecomp   = 0;
2049     comm->nload     = 0;
2050     comm->load_step = 0;
2051     comm->load_sum  = 0;
2052     comm->load_max  = 0;
2053     clear_ivec(comm->load_lim);
2054     comm->load_mdf  = 0;
2055     comm->load_pme  = 0;
2056
2057     /* This should be replaced by a unique pointer */
2058     comm->balanceRegion = ddBalanceRegionAllocate();
2059
2060     return comm;
2061 }
2062
2063 /* Returns whether mtop contains constraints and/or vsites */
2064 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2065 {
2066     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2067     int  nmol;
2068     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2069     {
2070         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2071         {
2072             return true;
2073         }
2074     }
2075
2076     return false;
2077 }
2078
2079 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2080                               const gmx_mtop_t    &mtop,
2081                               const t_inputrec    &inputrec,
2082                               const real           cutoffMargin,
2083                               DDSystemInfo        *systemInfo)
2084 {
2085     /* When we have constraints and/or vsites, it is beneficial to use
2086      * update groups (when possible) to allow independent update of groups.
2087      */
2088     if (!systemHasConstraintsOrVsites(mtop))
2089     {
2090         /* No constraints or vsites, atoms can be updated independently */
2091         return;
2092     }
2093
2094     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2095     systemInfo->useUpdateGroups               =
2096         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2097          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2098
2099     if (systemInfo->useUpdateGroups)
2100     {
2101         int numUpdateGroups = 0;
2102         for (const auto &molblock : mtop.molblock)
2103         {
2104             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2105         }
2106
2107         systemInfo->maxUpdateGroupRadius =
2108             computeMaxUpdateGroupRadius(mtop,
2109                                         systemInfo->updateGroupingPerMoleculetype,
2110                                         maxReferenceTemperature(inputrec));
2111
2112         /* To use update groups, the large domain-to-domain cutoff distance
2113          * should be compatible with the box size.
2114          */
2115         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2116
2117         if (systemInfo->useUpdateGroups)
2118         {
2119             GMX_LOG(mdlog.info).appendTextFormatted(
2120                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2121                     numUpdateGroups,
2122                     mtop.natoms/static_cast<double>(numUpdateGroups),
2123                     systemInfo->maxUpdateGroupRadius);
2124         }
2125         else
2126         {
2127             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2128             systemInfo->updateGroupingPerMoleculetype.clear();
2129         }
2130     }
2131 }
2132
2133 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2134     npbcdim(ePBC2npbcdim(ir.ePBC)),
2135     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2136     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2137     haveScrewPBC(ir.ePBC == epbcSCREW)
2138 {
2139 }
2140
2141 /*! \brief Generate the simulation system information */
2142 static DDSystemInfo
2143 getSystemInfo(const gmx::MDLogger           &mdlog,
2144               t_commrec                     *cr,
2145               const DomdecOptions           &options,
2146               const gmx_mtop_t              *mtop,
2147               const t_inputrec              *ir,
2148               const matrix                   box,
2149               gmx::ArrayRef<const gmx::RVec> xGlobal)
2150 {
2151     const real         tenPercentMargin = 1.1;
2152
2153     DDSystemInfo       systemInfo;
2154
2155     /* We need to decide on update groups early, as this affects communication distances */
2156     systemInfo.useUpdateGroups = false;
2157     if (ir->cutoff_scheme == ecutsVERLET)
2158     {
2159         real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2160         setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2161     }
2162
2163     // TODO: Check whether all bondeds are within update groups
2164     systemInfo.haveInterDomainBondeds          = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2165                                                   mtop->bIntermolecularInteractions);
2166     systemInfo.haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2167
2168     if (systemInfo.useUpdateGroups)
2169     {
2170         systemInfo.haveSplitConstraints = false;
2171         systemInfo.haveSplitSettles     = false;
2172     }
2173     else
2174     {
2175         systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2176         systemInfo.haveSplitSettles     = gmx::inter_charge_group_settles(*mtop);
2177     }
2178
2179     if (ir->rlist == 0)
2180     {
2181         /* Set the cut-off to some very large value,
2182          * so we don't need if statements everywhere in the code.
2183          * We use sqrt, since the cut-off is squared in some places.
2184          */
2185         systemInfo.cutoff = GMX_CUTOFF_INF;
2186     }
2187     else
2188     {
2189         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2190     }
2191     systemInfo.minCutoffForMultiBody = 0;
2192
2193     /* Determine the minimum cell size limit, affected by many factors */
2194     systemInfo.cellsizeLimit             = 0;
2195     systemInfo.filterBondedCommunication = false;
2196
2197     /* We do not allow home atoms to move beyond the neighboring domain
2198      * between domain decomposition steps, which limits the cell size.
2199      * Get an estimate of cell size limit due to atom displacement.
2200      * In most cases this is a large overestimate, because it assumes
2201      * non-interaction atoms.
2202      * We set the chance to 1 in a trillion steps.
2203      */
2204     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2205     const real     limitForAtomDisplacement          =
2206         minCellSizeForAtomDisplacement(*mtop, *ir,
2207                                        systemInfo.updateGroupingPerMoleculetype,
2208                                        c_chanceThatAtomMovesBeyondDomain);
2209     GMX_LOG(mdlog.info).appendTextFormatted(
2210             "Minimum cell size due to atom displacement: %.3f nm",
2211             limitForAtomDisplacement);
2212
2213     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2214                                         limitForAtomDisplacement);
2215
2216     /* TODO: PME decomposition currently requires atoms not to be more than
2217      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2218      *       In nearly all cases, limitForAtomDisplacement will be smaller
2219      *       than 2/3*rlist, so the PME requirement is satisfied.
2220      *       But it would be better for both correctness and performance
2221      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2222      *       Note that we would need to improve the pairlist buffer case.
2223      */
2224
2225     if (systemInfo.haveInterDomainBondeds)
2226     {
2227         if (options.minimumCommunicationRange > 0)
2228         {
2229             systemInfo.minCutoffForMultiBody =
2230                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2231             if (options.useBondedCommunication)
2232             {
2233                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2234             }
2235             else
2236             {
2237                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2238                                              systemInfo.minCutoffForMultiBody);
2239             }
2240         }
2241         else if (ir->bPeriodicMols)
2242         {
2243             /* Can not easily determine the required cut-off */
2244             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2245             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2246         }
2247         else
2248         {
2249             real r_2b, r_mb;
2250
2251             if (MASTER(cr))
2252             {
2253                 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2254                                       options.checkBondedInteractions,
2255                                       &r_2b, &r_mb);
2256             }
2257             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2258             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2259
2260             /* We use an initial margin of 10% for the minimum cell size,
2261              * except when we are just below the non-bonded cut-off.
2262              */
2263             if (options.useBondedCommunication)
2264             {
2265                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2266                 {
2267                     const real r_bonded              = std::max(r_2b, r_mb);
2268                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2269                     /* This is the (only) place where we turn on the filtering */
2270                     systemInfo.filterBondedCommunication = true;
2271                 }
2272                 else
2273                 {
2274                     const real r_bonded              = r_mb;
2275                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2276                                                                 systemInfo.cutoff);
2277                 }
2278                 /* We determine cutoff_mbody later */
2279                 systemInfo.increaseMultiBodyCutoff = true;
2280             }
2281             else
2282             {
2283                 /* No special bonded communication,
2284                  * simply increase the DD cut-off.
2285                  */
2286                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2287                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2288                                                             systemInfo.minCutoffForMultiBody);
2289             }
2290         }
2291         GMX_LOG(mdlog.info).appendTextFormatted(
2292                 "Minimum cell size due to bonded interactions: %.3f nm",
2293                 systemInfo.minCutoffForMultiBody);
2294
2295         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2296                                             systemInfo.minCutoffForMultiBody);
2297     }
2298
2299     systemInfo.constraintCommunicationRange = 0;
2300     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2301     {
2302         /* There is a cell size limit due to the constraints (P-LINCS) */
2303         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2304         GMX_LOG(mdlog.info).appendTextFormatted(
2305                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2306                 systemInfo.constraintCommunicationRange);
2307         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2308         {
2309             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2310         }
2311     }
2312     else if (options.constraintCommunicationRange > 0)
2313     {
2314         /* Here we do not check for dd->splitConstraints.
2315          * because one can also set a cell size limit for virtual sites only
2316          * and at this point we don't know yet if there are intercg v-sites.
2317          */
2318         GMX_LOG(mdlog.info).appendTextFormatted(
2319                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2320                 options.constraintCommunicationRange);
2321         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2322     }
2323     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2324                                         systemInfo.constraintCommunicationRange);
2325
2326     return systemInfo;
2327 }
2328
2329 /*! \brief Set the cell size and interaction limits, as well as the DD grid
2330  *
2331  * Also computes the initial ddbox.
2332  */
2333 static DDGridSetup
2334 getDDGridSetup(const gmx::MDLogger           &mdlog,
2335                t_commrec                     *cr,
2336                const DomdecOptions           &options,
2337                const DDSettings              &ddSettings,
2338                const DDSystemInfo            &systemInfo,
2339                const gmx_mtop_t              *mtop,
2340                const t_inputrec              *ir,
2341                const matrix                   box,
2342                gmx::ArrayRef<const gmx::RVec> xGlobal,
2343                gmx_ddbox_t                   *ddbox)
2344 {
2345     DDGridSetup ddGridSetup;
2346
2347     if (options.numCells[XX] > 0)
2348     {
2349         copy_ivec(options.numCells, ddGridSetup.numDomains);
2350         set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
2351
2352         if (options.numPmeRanks >= 0)
2353         {
2354             ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
2355         }
2356         else
2357         {
2358             /* When the DD grid is set explicitly and -npme is set to auto,
2359              * don't use PME ranks. We check later if the DD grid is
2360              * compatible with the total number of ranks.
2361              */
2362             ddGridSetup.numPmeOnlyRanks = 0;
2363         }
2364     }
2365     else
2366     {
2367         set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2368
2369         /* We need to choose the optimal DD grid and possibly PME nodes */
2370         ddGridSetup =
2371             dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
2372                            options.numPmeRanks,
2373                            ddSettings.request1DAnd1Pulse,
2374                            !isDlbDisabled(ddSettings.initialDlbState),
2375                            options.dlbScaling,
2376                            systemInfo);
2377
2378         if (ddGridSetup.numDomains[XX] == 0)
2379         {
2380             char     buf[STRLEN];
2381             gmx_bool bC = (systemInfo.haveSplitConstraints &&
2382                            systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2383             sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2384                     !bC ? "-rdd" : "-rcon",
2385                     ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2386                     bC ? " or your LINCS settings" : "");
2387
2388             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2389                                  "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2390                                  "%s\n"
2391                                  "Look in the log file for details on the domain decomposition",
2392                                  cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2393                                  ddGridSetup.cellsizeLimit,
2394                                  buf);
2395         }
2396     }
2397
2398     const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
2399     if (acs < systemInfo.cellsizeLimit)
2400     {
2401         if (options.numCells[XX] <= 0)
2402         {
2403             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2404         }
2405         else
2406         {
2407             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2408                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2409                                  acs, systemInfo.cellsizeLimit);
2410         }
2411     }
2412
2413     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2414     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2415     {
2416         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2417                              "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2418                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2419     }
2420     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2421     {
2422         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2423                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2424     }
2425
2426     ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
2427                                              ddGridSetup.ddDimensions);
2428
2429     return ddGridSetup;
2430 }
2431
2432 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2433 static DDRankSetup
2434 getDDRankSetup(const gmx::MDLogger &mdlog,
2435                t_commrec           *cr,
2436                const DDGridSetup   &ddGridSetup,
2437                const t_inputrec    &ir)
2438 {
2439     GMX_LOG(mdlog.info).appendTextFormatted(
2440             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2441             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2442             ddGridSetup.numPmeOnlyRanks);
2443
2444     DDRankSetup ddRankSetup;
2445
2446     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2447     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2448
2449     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2450     if (ddRankSetup.usePmeOnlyRanks)
2451     {
2452         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2453     }
2454     else
2455     {
2456         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2457     }
2458
2459     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2460     {
2461         /* The following choices should match those
2462          * in comm_cost_est in domdec_setup.c.
2463          * Note that here the checks have to take into account
2464          * that the decomposition might occur in a different order than xyz
2465          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2466          * in which case they will not match those in comm_cost_est,
2467          * but since that is mainly for testing purposes that's fine.
2468          */
2469         if (ddGridSetup.numDDDimensions >= 2 &&
2470             ddGridSetup.ddDimensions[0] == XX &&
2471             ddGridSetup.ddDimensions[1] == YY &&
2472             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2473             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2474             getenv("GMX_PMEONEDD") == nullptr)
2475         {
2476             ddRankSetup.npmedecompdim = 2;
2477             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2478             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2479         }
2480         else
2481         {
2482             /* In case nc is 1 in both x and y we could still choose to
2483              * decompose pme in y instead of x, but we use x for simplicity.
2484              */
2485             ddRankSetup.npmedecompdim = 1;
2486             if (ddGridSetup.ddDimensions[0] == YY)
2487             {
2488                 ddRankSetup.npmenodes_x = 1;
2489                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2490             }
2491             else
2492             {
2493                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2494                 ddRankSetup.npmenodes_y = 1;
2495             }
2496         }
2497         GMX_LOG(mdlog.info).appendTextFormatted(
2498                 "PME domain decomposition: %d x %d x %d",
2499                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2500     }
2501     else
2502     {
2503         ddRankSetup.npmedecompdim = 0;
2504         ddRankSetup.npmenodes_x   = 0;
2505         ddRankSetup.npmenodes_y   = 0;
2506     }
2507
2508     return ddRankSetup;
2509 }
2510
2511 /*! \brief Set the cell size and interaction limits */
2512 static void set_dd_limits(const gmx::MDLogger &mdlog,
2513                           t_commrec *cr, gmx_domdec_t *dd,
2514                           const DomdecOptions &options,
2515                           const DDSettings &ddSettings,
2516                           const DDSystemInfo &systemInfo,
2517                           const DDGridSetup &ddGridSetup,
2518                           const gmx_mtop_t *mtop,
2519                           const t_inputrec *ir,
2520                           const gmx_ddbox_t &ddbox)
2521 {
2522     gmx_domdec_comm_t *comm = dd->comm;
2523     comm->ddSettings        = ddSettings;
2524
2525     /* Initialize to GPU share count to 0, might change later */
2526     comm->nrank_gpu_shared = 0;
2527
2528     comm->dlbState         = comm->ddSettings.initialDlbState;
2529     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2530     /* To consider turning DLB on after 2*nstlist steps we need to check
2531      * at partitioning count 3. Thus we need to increase the first count by 2.
2532      */
2533     comm->ddPartioningCountFirstDlbOff += 2;
2534
2535     comm->bPMELoadBalDLBLimits          = FALSE;
2536
2537     /* Allocate the charge group/atom sorting struct */
2538     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2539
2540     comm->systemInfo = systemInfo;
2541
2542     if (systemInfo.useUpdateGroups)
2543     {
2544         /* Note: We would like to use dd->nnodes for the atom count estimate,
2545          *       but that is not yet available here. But this anyhow only
2546          *       affect performance up to the second dd_partition_system call.
2547          */
2548         const int homeAtomCountEstimate =  mtop->natoms/cr->nnodes;
2549         comm->updateGroupsCog =
2550             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2551                                                    systemInfo.updateGroupingPerMoleculetype,
2552                                                    maxReferenceTemperature(*ir),
2553                                                    homeAtomCountEstimate);
2554     }
2555
2556     comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2557
2558     /* Set the DD setup given by ddGridSetup */
2559     copy_ivec(ddGridSetup.numDomains, dd->nc);
2560     dd->ndim = ddGridSetup.numDDDimensions;
2561     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2562
2563     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2564
2565     snew(comm->slb_frac, DIM);
2566     if (isDlbDisabled(comm))
2567     {
2568         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2569         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2570         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2571     }
2572
2573     /* Set the multi-body cut-off and cellsize limit for DLB */
2574     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2575     comm->cellsize_limit = systemInfo.cellsizeLimit;
2576     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2577     {
2578         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2579         {
2580             /* Set the bonded communication distance to halfway
2581              * the minimum and the maximum,
2582              * since the extra communication cost is nearly zero.
2583              */
2584             real acs           = average_cellsize_min(ddbox, dd->nc);
2585             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2586             if (!isDlbDisabled(comm))
2587             {
2588                 /* Check if this does not limit the scaling */
2589                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2590                                               options.dlbScaling*acs);
2591             }
2592             if (!systemInfo.filterBondedCommunication)
2593             {
2594                 /* Without bBondComm do not go beyond the n.b. cut-off */
2595                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2596                 if (comm->cellsize_limit >= systemInfo.cutoff)
2597                 {
2598                     /* We don't loose a lot of efficieny
2599                      * when increasing it to the n.b. cut-off.
2600                      * It can even be slightly faster, because we need
2601                      * less checks for the communication setup.
2602                      */
2603                     comm->cutoff_mbody = systemInfo.cutoff;
2604                 }
2605             }
2606             /* Check if we did not end up below our original limit */
2607             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2608                                           systemInfo.minCutoffForMultiBody);
2609
2610             if (comm->cutoff_mbody > comm->cellsize_limit)
2611             {
2612                 comm->cellsize_limit = comm->cutoff_mbody;
2613             }
2614         }
2615         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2616     }
2617
2618     if (debug)
2619     {
2620         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2621                 "cellsize limit %f\n",
2622                 gmx::boolToString(systemInfo.filterBondedCommunication),
2623                 comm->cellsize_limit);
2624     }
2625
2626     if (MASTER(cr))
2627     {
2628         check_dd_restrictions(dd, ir, mdlog);
2629     }
2630 }
2631
2632 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2633 {
2634     int   ncg, cg;
2635     char *bLocalCG;
2636
2637     ncg = ncg_mtop(mtop);
2638     snew(bLocalCG, ncg);
2639     for (cg = 0; cg < ncg; cg++)
2640     {
2641         bLocalCG[cg] = FALSE;
2642     }
2643
2644     return bLocalCG;
2645 }
2646
2647 void dd_init_bondeds(FILE *fplog,
2648                      gmx_domdec_t *dd,
2649                      const gmx_mtop_t *mtop,
2650                      const gmx_vsite_t *vsite,
2651                      const t_inputrec *ir,
2652                      gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2653 {
2654     gmx_domdec_comm_t *comm;
2655
2656     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2657
2658     comm = dd->comm;
2659
2660     if (comm->systemInfo.filterBondedCommunication)
2661     {
2662         /* Communicate atoms beyond the cut-off for bonded interactions */
2663         comm = dd->comm;
2664
2665         comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2666
2667         comm->bLocalCG = init_bLocalCG(mtop);
2668     }
2669     else
2670     {
2671         /* Only communicate atoms based on cut-off */
2672         comm->cglink   = nullptr;
2673         comm->bLocalCG = nullptr;
2674     }
2675 }
2676
2677 static void writeSettings(gmx::TextWriter       *log,
2678                           gmx_domdec_t          *dd,
2679                           const gmx_mtop_t      *mtop,
2680                           const t_inputrec      *ir,
2681                           gmx_bool               bDynLoadBal,
2682                           real                   dlb_scale,
2683                           const gmx_ddbox_t     *ddbox)
2684 {
2685     gmx_domdec_comm_t *comm;
2686     int                d;
2687     ivec               np;
2688     real               limit, shrink;
2689
2690     comm = dd->comm;
2691
2692     if (bDynLoadBal)
2693     {
2694         log->writeString("The maximum number of communication pulses is:");
2695         for (d = 0; d < dd->ndim; d++)
2696         {
2697             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2698         }
2699         log->ensureLineBreak();
2700         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2701         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2702         log->writeString("The allowed shrink of domain decomposition cells is:");
2703         for (d = 0; d < DIM; d++)
2704         {
2705             if (dd->nc[d] > 1)
2706             {
2707                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2708                 {
2709                     shrink = 0;
2710                 }
2711                 else
2712                 {
2713                     shrink =
2714                         comm->cellsize_min_dlb[d]/
2715                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2716                 }
2717                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2718             }
2719         }
2720         log->ensureLineBreak();
2721     }
2722     else
2723     {
2724         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2725         log->writeString("The initial number of communication pulses is:");
2726         for (d = 0; d < dd->ndim; d++)
2727         {
2728             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2729         }
2730         log->ensureLineBreak();
2731         log->writeString("The initial domain decomposition cell size is:");
2732         for (d = 0; d < DIM; d++)
2733         {
2734             if (dd->nc[d] > 1)
2735             {
2736                 log->writeStringFormatted(" %c %.2f nm",
2737                                           dim2char(d), dd->comm->cellsize_min[d]);
2738             }
2739         }
2740         log->ensureLineBreak();
2741         log->writeLine();
2742     }
2743
2744     const bool haveInterDomainVsites =
2745         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2746
2747     if (comm->systemInfo.haveInterDomainBondeds ||
2748         haveInterDomainVsites ||
2749         comm->systemInfo.haveSplitConstraints ||
2750         comm->systemInfo.haveSplitSettles)
2751     {
2752         std::string decompUnits;
2753         if (comm->systemInfo.useUpdateGroups)
2754         {
2755             decompUnits = "atom groups";
2756         }
2757         else
2758         {
2759             decompUnits = "atoms";
2760         }
2761
2762         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2763         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2764
2765         if (bDynLoadBal)
2766         {
2767             limit = dd->comm->cellsize_limit;
2768         }
2769         else
2770         {
2771             if (dd->unitCellInfo.ddBoxIsDynamic)
2772             {
2773                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2774             }
2775             limit = dd->comm->cellsize_min[XX];
2776             for (d = 1; d < DIM; d++)
2777             {
2778                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2779             }
2780         }
2781
2782         if (comm->systemInfo.haveInterDomainBondeds)
2783         {
2784             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2785                                     "two-body bonded interactions", "(-rdd)",
2786                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2787             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2788                                     "multi-body bonded interactions", "(-rdd)",
2789                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2790         }
2791         if (haveInterDomainVsites)
2792         {
2793             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2794                                     "virtual site constructions", "(-rcon)", limit);
2795         }
2796         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2797         {
2798             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2799                                                        1+ir->nProjOrder);
2800             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2801                                     separation.c_str(), "(-rcon)", limit);
2802         }
2803         log->ensureLineBreak();
2804     }
2805 }
2806
2807 static void logSettings(const gmx::MDLogger &mdlog,
2808                         gmx_domdec_t        *dd,
2809                         const gmx_mtop_t    *mtop,
2810                         const t_inputrec    *ir,
2811                         real                 dlb_scale,
2812                         const gmx_ddbox_t   *ddbox)
2813 {
2814     gmx::StringOutputStream stream;
2815     gmx::TextWriter         log(&stream);
2816     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2817     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2818     {
2819         {
2820             log.ensureEmptyLine();
2821             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2822         }
2823         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2824     }
2825     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2826 }
2827
2828 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2829                                 gmx_domdec_t        *dd,
2830                                 real                 dlb_scale,
2831                                 const t_inputrec    *ir,
2832                                 const gmx_ddbox_t   *ddbox)
2833 {
2834     gmx_domdec_comm_t *comm;
2835     int                d, dim, npulse, npulse_d_max, npulse_d;
2836     gmx_bool           bNoCutOff;
2837
2838     comm = dd->comm;
2839
2840     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2841
2842     /* Determine the maximum number of comm. pulses in one dimension */
2843
2844     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2845
2846     /* Determine the maximum required number of grid pulses */
2847     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2848     {
2849         /* Only a single pulse is required */
2850         npulse = 1;
2851     }
2852     else if (!bNoCutOff && comm->cellsize_limit > 0)
2853     {
2854         /* We round down slightly here to avoid overhead due to the latency
2855          * of extra communication calls when the cut-off
2856          * would be only slightly longer than the cell size.
2857          * Later cellsize_limit is redetermined,
2858          * so we can not miss interactions due to this rounding.
2859          */
2860         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2861     }
2862     else
2863     {
2864         /* There is no cell size limit */
2865         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2866     }
2867
2868     if (!bNoCutOff && npulse > 1)
2869     {
2870         /* See if we can do with less pulses, based on dlb_scale */
2871         npulse_d_max = 0;
2872         for (d = 0; d < dd->ndim; d++)
2873         {
2874             dim      = dd->dim[d];
2875             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2876                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2877             npulse_d_max = std::max(npulse_d_max, npulse_d);
2878         }
2879         npulse = std::min(npulse, npulse_d_max);
2880     }
2881
2882     /* This env var can override npulse */
2883     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2884     if (d > 0)
2885     {
2886         npulse = d;
2887     }
2888
2889     comm->maxpulse       = 1;
2890     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2891     for (d = 0; d < dd->ndim; d++)
2892     {
2893         if (comm->ddSettings.request1DAnd1Pulse)
2894         {
2895             comm->cd[d].np_dlb = 1;
2896         }
2897         else
2898         {
2899             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2900             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2901         }
2902         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2903         {
2904             comm->bVacDLBNoLimit = FALSE;
2905         }
2906     }
2907
2908     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2909     if (!comm->bVacDLBNoLimit)
2910     {
2911         comm->cellsize_limit = std::max(comm->cellsize_limit,
2912                                         comm->systemInfo.cutoff/comm->maxpulse);
2913     }
2914     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2915     /* Set the minimum cell size for each DD dimension */
2916     for (d = 0; d < dd->ndim; d++)
2917     {
2918         if (comm->bVacDLBNoLimit ||
2919             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2920         {
2921             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2922         }
2923         else
2924         {
2925             comm->cellsize_min_dlb[dd->dim[d]] =
2926                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2927         }
2928     }
2929     if (comm->cutoff_mbody <= 0)
2930     {
2931         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2932     }
2933     if (isDlbOn(comm))
2934     {
2935         set_dlb_limits(dd);
2936     }
2937 }
2938
2939 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2940 {
2941     /* If each molecule is a single charge group
2942      * or we use domain decomposition for each periodic dimension,
2943      * we do not need to take pbc into account for the bonded interactions.
2944      */
2945     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2946             !(dd->nc[XX] > 1 &&
2947               dd->nc[YY] > 1 &&
2948               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2949 }
2950
2951 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2952 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2953                                   gmx_domdec_t *dd, real dlb_scale,
2954                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2955                                   const gmx_ddbox_t *ddbox)
2956 {
2957     gmx_domdec_comm_t *comm        = dd->comm;
2958     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2959
2960     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2961     {
2962         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2963         if (ddRankSetup.npmedecompdim >= 2)
2964         {
2965             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2966         }
2967     }
2968     else
2969     {
2970         ddRankSetup.numRanksDoingPme = 0;
2971         if (dd->pme_nodeid >= 0)
2972         {
2973             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2974                                  "Can not have separate PME ranks without PME electrostatics");
2975         }
2976     }
2977
2978     if (debug)
2979     {
2980         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2981     }
2982     if (!isDlbDisabled(comm))
2983     {
2984         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2985     }
2986
2987     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2988
2989     real vol_frac;
2990     if (ir->ePBC == epbcNONE)
2991     {
2992         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2993     }
2994     else
2995     {
2996         vol_frac =
2997             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
2998     }
2999     if (debug)
3000     {
3001         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
3002     }
3003     int natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
3004
3005     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
3006                                      static_cast<int>(vol_frac*natoms_tot));
3007 }
3008
3009 /*! \brief Get some important DD parameters which can be modified by env.vars */
3010 static DDSettings
3011 getDDSettings(const gmx::MDLogger     &mdlog,
3012               const DomdecOptions     &options,
3013               const gmx::MdrunOptions &mdrunOptions,
3014               const t_inputrec        &ir)
3015 {
3016     DDSettings ddSettings;
3017
3018     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
3019     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
3020     // TODO GPU halo exchange requires a 1D single-pulse DD, and when
3021     // it is properly integrated the hack with GMX_GPU_DD_COMMS should
3022     // be removed.
3023     ddSettings.request1DAnd1Pulse  = (bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0)) ||
3024                                       (bool(getenv("GMX_GPU_DD_COMMS") != nullptr &&
3025                                             GMX_THREAD_MPI &&
3026                                             (GMX_GPU == GMX_GPU_CUDA))));
3027     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
3028     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
3029     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
3030     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
3031     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
3032     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
3033     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
3034
3035     if (ddSettings.useSendRecv2)
3036     {
3037         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
3038     }
3039
3040     if (ddSettings.eFlop)
3041     {
3042         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
3043         ddSettings.recordLoad = true;
3044     }
3045     else
3046     {
3047         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
3048     }
3049
3050     ddSettings.initialDlbState =
3051         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
3052     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
3053                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
3054
3055     return ddSettings;
3056 }
3057
3058 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
3059     unitCellInfo(ir)
3060 {
3061 }
3062
3063 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
3064                                         t_commrec                     *cr,
3065                                         const DomdecOptions           &options,
3066                                         const gmx::MdrunOptions       &mdrunOptions,
3067                                         const gmx_mtop_t              *mtop,
3068                                         const t_inputrec              *ir,
3069                                         const matrix                   box,
3070                                         gmx::ArrayRef<const gmx::RVec> xGlobal,
3071                                         gmx::LocalAtomSetManager      *atomSets)
3072 {
3073     GMX_LOG(mdlog.info).appendTextFormatted(
3074             "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3075
3076     DDSettings  ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3077     if (ddSettings.eFlop > 1)
3078     {
3079         /* Ensure that we have different random flop counts on different ranks */
3080         srand(1 + cr->nodeid);
3081     }
3082
3083     DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3084
3085     gmx_ddbox_t  ddbox       = {0};
3086     DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
3087                                               mtop, ir, box, xGlobal, &ddbox);
3088
3089     cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3090
3091     DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3092
3093     /* Generate the group communicator, also decides the duty of each rank */
3094     ivec               ddCellIndex = { 0, 0, 0 };
3095     std::vector<int>   pmeRanks;
3096     CartesianRankSetup cartSetup   =
3097         makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3098                                ddRankSetup, cr,
3099                                ddCellIndex, &pmeRanks);
3100
3101     gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3102
3103     copy_ivec(ddCellIndex, dd->ci);
3104
3105     dd->comm = init_dd_comm();
3106
3107     dd->comm->ddRankSetup        = ddRankSetup;
3108     dd->comm->cartesianRankSetup = cartSetup;
3109
3110     set_dd_limits(mdlog, cr, dd, options,
3111                   ddSettings, systemInfo, ddGridSetup,
3112                   mtop, ir,
3113                   ddbox);
3114
3115     setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, dd);
3116
3117     if (thisRankHasDuty(cr, DUTY_PP))
3118     {
3119         set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3120
3121         setup_neighbor_relations(dd);
3122     }
3123
3124     /* Set overallocation to avoid frequent reallocation of arrays */
3125     set_over_alloc_dd(TRUE);
3126
3127     dd->atomSets = atomSets;
3128
3129     return dd;
3130 }
3131
3132 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3133                                const matrix                   box,
3134                                gmx::ArrayRef<const gmx::RVec> x,
3135                                real                           cutoffRequested)
3136 {
3137     gmx_domdec_t *dd;
3138     gmx_ddbox_t   ddbox;
3139     int           d, dim, np;
3140     real          inv_cell_size;
3141     int           LocallyLimited;
3142
3143     dd = cr->dd;
3144
3145     set_ddbox(*dd, false, box, true, x, &ddbox);
3146
3147     LocallyLimited = 0;
3148
3149     for (d = 0; d < dd->ndim; d++)
3150     {
3151         dim = dd->dim[d];
3152
3153         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3154         if (dd->unitCellInfo.ddBoxIsDynamic)
3155         {
3156             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3157         }
3158
3159         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3160
3161         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3162         {
3163             if (np > dd->comm->cd[d].np_dlb)
3164             {
3165                 return FALSE;
3166             }
3167
3168             /* If a current local cell size is smaller than the requested
3169              * cut-off, we could still fix it, but this gets very complicated.
3170              * Without fixing here, we might actually need more checks.
3171              */
3172             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3173             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3174             {
3175                 LocallyLimited = 1;
3176             }
3177         }
3178     }
3179
3180     if (!isDlbDisabled(dd->comm))
3181     {
3182         /* If DLB is not active yet, we don't need to check the grid jumps.
3183          * Actually we shouldn't, because then the grid jump data is not set.
3184          */
3185         if (isDlbOn(dd->comm) &&
3186             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3187         {
3188             LocallyLimited = 1;
3189         }
3190
3191         gmx_sumi(1, &LocallyLimited, cr);
3192
3193         if (LocallyLimited > 0)
3194         {
3195             return FALSE;
3196         }
3197     }
3198
3199     return TRUE;
3200 }
3201
3202 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3203                           const matrix                   box,
3204                           gmx::ArrayRef<const gmx::RVec> x,
3205                           real                           cutoffRequested)
3206 {
3207     gmx_bool bCutoffAllowed;
3208
3209     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3210
3211     if (bCutoffAllowed)
3212     {
3213         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3214     }
3215
3216     return bCutoffAllowed;
3217 }