src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51 #include <memory>
  52
  53 #include "gromacs/domdec/collect.h"
  54 #include "gromacs/domdec/dlb.h"
  55 #include "gromacs/domdec/dlbtiming.h"
  56 #include "gromacs/domdec/domdec_network.h"
  57 #include "gromacs/domdec/ga2la.h"
  58 #include "gromacs/domdec/gpuhaloexchange.h"
  59 #include "gromacs/domdec/options.h"
  60 #include "gromacs/domdec/partition.h"
  61 #include "gromacs/gmxlib/network.h"
  62 #include "gromacs/gmxlib/nrnb.h"
  63 #include "gromacs/gpu_utils/gpu_utils.h"
  64 #include "gromacs/hardware/hw_info.h"
  65 #include "gromacs/listed_forces/manage_threading.h"
  66 #include "gromacs/math/vec.h"
  67 #include "gromacs/math/vectypes.h"
  68 #include "gromacs/mdlib/calc_verletbuf.h"
  69 #include "gromacs/mdlib/constr.h"
  70 #include "gromacs/mdlib/constraintrange.h"
  71 #include "gromacs/mdlib/updategroups.h"
  72 #include "gromacs/mdlib/vsite.h"
  73 #include "gromacs/mdtypes/commrec.h"
  74 #include "gromacs/mdtypes/forceoutput.h"
  75 #include "gromacs/mdtypes/inputrec.h"
  76 #include "gromacs/mdtypes/mdrunoptions.h"
  77 #include "gromacs/mdtypes/state.h"
  78 #include "gromacs/pbcutil/ishift.h"
  79 #include "gromacs/pbcutil/pbc.h"
  80 #include "gromacs/pulling/pull.h"
  81 #include "gromacs/timing/wallcycle.h"
  82 #include "gromacs/topology/block.h"
  83 #include "gromacs/topology/idef.h"
  84 #include "gromacs/topology/ifunc.h"
  85 #include "gromacs/topology/mtop_lookup.h"
  86 #include "gromacs/topology/mtop_util.h"
  87 #include "gromacs/topology/topology.h"
  88 #include "gromacs/utility/basedefinitions.h"
  89 #include "gromacs/utility/basenetwork.h"
  90 #include "gromacs/utility/cstringutil.h"
  91 #include "gromacs/utility/exceptions.h"
  92 #include "gromacs/utility/fatalerror.h"
  93 #include "gromacs/utility/gmxmpi.h"
  94 #include "gromacs/utility/logger.h"
  95 #include "gromacs/utility/real.h"
  96 #include "gromacs/utility/smalloc.h"
  97 #include "gromacs/utility/strconvert.h"
  98 #include "gromacs/utility/stringstream.h"
  99 #include "gromacs/utility/stringutil.h"
 100 #include "gromacs/utility/textwriter.h"
 101
 102 #include "atomdistribution.h"
 103 #include "box.h"
 104 #include "cellsizes.h"
 105 #include "distribute.h"
 106 #include "domdec_constraints.h"
 107 #include "domdec_internal.h"
 108 #include "domdec_setup.h"
 109 #include "domdec_vsite.h"
 110 #include "redistribute.h"
 111 #include "utility.h"
 112
 113 // TODO remove this when moving domdec into gmx namespace
 114 using gmx::DdRankOrder;
 115 using gmx::DlbOption;
 116 using gmx::DomdecOptions;
 117
 118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 119
 120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
 121 #define DD_CGIBS 2
 122
 123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 124 #define DD_FLAG_NRCG  65535
 125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 127
 128 /* The DD zone order */
 129 static const ivec dd_zo[DD_MAXZONE] =
 130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 131
 132 /* The non-bonded zone-pair setup for domain decomposition
 133  * The first number is the i-zone, the second number the first j-zone seen by
 134  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 135  * As is, this is for 3D decomposition, where there are 4 i-zones.
 136  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 137  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 138  */
 139 static const int
 140     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 141                                                  {1, 3, 6},
 142                                                  {2, 5, 6},
 143                                                  {3, 5, 7}};
 144
 145
 146 /*
 147    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 148
 149    static void index2xyz(ivec nc,int ind,ivec xyz)
 150    {
 151    xyz[XX] = ind % nc[XX];
 152    xyz[YY] = (ind / nc[XX]) % nc[YY];
 153    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 154    }
 155  */
 156
 157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 158 {
 159     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 160     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 161     xyz[ZZ] = ind % nc[ZZ];
 162 }
 163
 164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 165 {
 166     int                       ddnodeid = -1;
 167
 168     const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
 169     const int                 ddindex   = dd_index(dd->nc, c);
 170     if (cartSetup.bCartesianPP_PME)
 171     {
 172         ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
 173     }
 174     else if (cartSetup.bCartesianPP)
 175     {
 176 #if GMX_MPI
 177         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 178 #endif
 179     }
 180     else
 181     {
 182         ddnodeid = ddindex;
 183     }
 184
 185     return ddnodeid;
 186 }
 187
 188 int ddglatnr(const gmx_domdec_t *dd, int i)
 189 {
 190     int atnr;
 191
 192     if (dd == nullptr)
 193     {
 194         atnr = i + 1;
 195     }
 196     else
 197     {
 198         if (i >= dd->comm->atomRanges.numAtomsTotal())
 199         {
 200             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 201         }
 202         atnr = dd->globalAtomIndices[i] + 1;
 203     }
 204
 205     return atnr;
 206 }
 207
 208 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 209 {
 210     return &dd->comm->cgs_gl;
 211 }
 212
 213 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
 214 {
 215     GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
 216     return dd.comm->systemInfo.updateGroupingPerMoleculetype;
 217 }
 218
 219 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 220 {
 221     int i;
 222
 223     if (state->ddp_count != dd->ddp_count)
 224     {
 225         gmx_incons("The MD state does not match the domain decomposition state");
 226     }
 227
 228     state->cg_gl.resize(dd->ncg_home);
 229     for (i = 0; i < dd->ncg_home; i++)
 230     {
 231         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 232     }
 233
 234     state->ddp_count_cg_gl = dd->ddp_count;
 235 }
 236
 237 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 238 {
 239     return &dd->comm->zones;
 240 }
 241
 242 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
 243                       int *jcg0, int *jcg1, ivec shift0, ivec shift1)
 244 {
 245     gmx_domdec_zones_t *zones;
 246     int                 izone, d, dim;
 247
 248     zones = &dd->comm->zones;
 249
 250     izone = 0;
 251     while (icg >= zones->izone[izone].cg1)
 252     {
 253         izone++;
 254     }
 255
 256     if (izone == 0)
 257     {
 258         *jcg0 = icg;
 259     }
 260     else if (izone < zones->nizone)
 261     {
 262         *jcg0 = zones->izone[izone].jcg0;
 263     }
 264     else
 265     {
 266         gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
 267                   icg, izone, zones->nizone);
 268     }
 269
 270     *jcg1 = zones->izone[izone].jcg1;
 271
 272     for (d = 0; d < dd->ndim; d++)
 273     {
 274         dim         = dd->dim[d];
 275         shift0[dim] = zones->izone[izone].shift0[dim];
 276         shift1[dim] = zones->izone[izone].shift1[dim];
 277         if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
 278         {
 279             /* A conservative approach, this can be optimized */
 280             shift0[dim] -= 1;
 281             shift1[dim] += 1;
 282         }
 283     }
 284 }
 285
 286 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 287 {
 288     return dd.comm->atomRanges.numHomeAtoms();
 289 }
 290
 291 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 292 {
 293     /* We currently set mdatoms entries for all atoms:
 294      * local + non-local + communicated for vsite + constraints
 295      */
 296
 297     return dd->comm->atomRanges.numAtomsTotal();
 298 }
 299
 300 int dd_natoms_vsite(const gmx_domdec_t *dd)
 301 {
 302     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 303 }
 304
 305 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 306 {
 307     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 308     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 309 }
 310
 311 void dd_move_x(gmx_domdec_t             *dd,
 312                const matrix              box,
 313                gmx::ArrayRef<gmx::RVec>  x,
 314                gmx_wallcycle            *wcycle)
 315 {
 316     wallcycle_start(wcycle, ewcMOVEX);
 317
 318     int                    nzone, nat_tot;
 319     gmx_domdec_comm_t     *comm;
 320     gmx_domdec_comm_dim_t *cd;
 321     rvec                   shift = {0, 0, 0};
 322     gmx_bool               bPBC, bScrew;
 323
 324     comm = dd->comm;
 325
 326     nzone   = 1;
 327     nat_tot = comm->atomRanges.numHomeAtoms();
 328     for (int d = 0; d < dd->ndim; d++)
 329     {
 330         bPBC   = (dd->ci[dd->dim[d]] == 0);
 331         bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 332         if (bPBC)
 333         {
 334             copy_rvec(box[dd->dim[d]], shift);
 335         }
 336         cd = &comm->cd[d];
 337         for (const gmx_domdec_ind_t &ind : cd->ind)
 338         {
 339             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 340             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 341             int                        n          = 0;
 342             if (!bPBC)
 343             {
 344                 for (int j : ind.index)
 345                 {
 346                     sendBuffer[n] = x[j];
 347                     n++;
 348                 }
 349             }
 350             else if (!bScrew)
 351             {
 352                 for (int j : ind.index)
 353                 {
 354                     /* We need to shift the coordinates */
 355                     for (int d = 0; d < DIM; d++)
 356                     {
 357                         sendBuffer[n][d] = x[j][d] + shift[d];
 358                     }
 359                     n++;
 360                 }
 361             }
 362             else
 363             {
 364                 for (int j : ind.index)
 365                 {
 366                     /* Shift x */
 367                     sendBuffer[n][XX] = x[j][XX] + shift[XX];
 368                     /* Rotate y and z.
 369                      * This operation requires a special shift force
 370                      * treatment, which is performed in calc_vir.
 371                      */
 372                     sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 373                     sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 374                     n++;
 375                 }
 376             }
 377
 378             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 379
 380             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 381             if (cd->receiveInPlace)
 382             {
 383                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 384             }
 385             else
 386             {
 387                 receiveBuffer = receiveBufferAccess.buffer;
 388             }
 389             /* Send and receive the coordinates */
 390             ddSendrecv(dd, d, dddirBackward,
 391                        sendBuffer, receiveBuffer);
 392
 393             if (!cd->receiveInPlace)
 394             {
 395                 int j = 0;
 396                 for (int zone = 0; zone < nzone; zone++)
 397                 {
 398                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 399                     {
 400                         x[i] = receiveBuffer[j++];
 401                     }
 402                 }
 403             }
 404             nat_tot += ind.nrecv[nzone+1];
 405         }
 406         nzone += nzone;
 407     }
 408
 409     wallcycle_stop(wcycle, ewcMOVEX);
 410 }
 411
 412 void dd_move_f(gmx_domdec_t              *dd,
 413                gmx::ForceWithShiftForces *forceWithShiftForces,
 414                gmx_wallcycle             *wcycle)
 415 {
 416     wallcycle_start(wcycle, ewcMOVEF);
 417
 418     gmx::ArrayRef<gmx::RVec> f       = forceWithShiftForces->force();
 419     gmx::ArrayRef<gmx::RVec> fshift  = forceWithShiftForces->shiftForces();
 420
 421     gmx_domdec_comm_t       &comm    = *dd->comm;
 422     int                      nzone   = comm.zones.n/2;
 423     int                      nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
 424     for (int d = dd->ndim-1; d >= 0; d--)
 425     {
 426         /* Only forces in domains near the PBC boundaries need to
 427            consider PBC in the treatment of fshift */
 428         const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
 429         const bool applyScrewPbc      = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
 430         /* Determine which shift vector we need */
 431         ivec       vis   = { 0, 0, 0 };
 432         vis[dd->dim[d]]  = 1;
 433         const int  is    = IVEC2IS(vis);
 434
 435         /* Loop over the pulses */
 436         const gmx_domdec_comm_dim_t &cd = comm.cd[d];
 437         for (int p = cd.numPulses() - 1; p >= 0; p--)
 438         {
 439             const gmx_domdec_ind_t    &ind  = cd.ind[p];
 440             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
 441             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 442
 443             nat_tot                                 -= ind.nrecv[nzone+1];
 444
 445             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 446
 447             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 448             if (cd.receiveInPlace)
 449             {
 450                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 451             }
 452             else
 453             {
 454                 sendBuffer = sendBufferAccess.buffer;
 455                 int j = 0;
 456                 for (int zone = 0; zone < nzone; zone++)
 457                 {
 458                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 459                     {
 460                         sendBuffer[j++] = f[i];
 461                     }
 462                 }
 463             }
 464             /* Communicate the forces */
 465             ddSendrecv(dd, d, dddirForward,
 466                        sendBuffer, receiveBuffer);
 467             /* Add the received forces */
 468             int n = 0;
 469             if (!shiftForcesNeedPbc)
 470             {
 471                 for (int j : ind.index)
 472                 {
 473                     for (int d = 0; d < DIM; d++)
 474                     {
 475                         f[j][d] += receiveBuffer[n][d];
 476                     }
 477                     n++;
 478                 }
 479             }
 480             else if (!applyScrewPbc)
 481             {
 482                 for (int j : ind.index)
 483                 {
 484                     for (int d = 0; d < DIM; d++)
 485                     {
 486                         f[j][d] += receiveBuffer[n][d];
 487                     }
 488                     /* Add this force to the shift force */
 489                     for (int d = 0; d < DIM; d++)
 490                     {
 491                         fshift[is][d] += receiveBuffer[n][d];
 492                     }
 493                     n++;
 494                 }
 495             }
 496             else
 497             {
 498                 for (int j : ind.index)
 499                 {
 500                     /* Rotate the force */
 501                     f[j][XX] += receiveBuffer[n][XX];
 502                     f[j][YY] -= receiveBuffer[n][YY];
 503                     f[j][ZZ] -= receiveBuffer[n][ZZ];
 504                     if (shiftForcesNeedPbc)
 505                     {
 506                         /* Add this force to the shift force */
 507                         for (int d = 0; d < DIM; d++)
 508                         {
 509                             fshift[is][d] += receiveBuffer[n][d];
 510                         }
 511                     }
 512                     n++;
 513                 }
 514             }
 515         }
 516         nzone /= 2;
 517     }
 518     wallcycle_stop(wcycle, ewcMOVEF);
 519 }
 520
 521 /* Convenience function for extracting a real buffer from an rvec buffer
 522  *
 523  * To reduce the number of temporary communication buffers and avoid
 524  * cache polution, we reuse gmx::RVec buffers for storing reals.
 525  * This functions return a real buffer reference with the same number
 526  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 527  */
 528 static gmx::ArrayRef<real>
 529 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 530 {
 531     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 532                                   arrayRef.size());
 533 }
 534
 535 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 536 {
 537     int                    nzone, nat_tot;
 538     gmx_domdec_comm_t     *comm;
 539     gmx_domdec_comm_dim_t *cd;
 540
 541     comm = dd->comm;
 542
 543     nzone   = 1;
 544     nat_tot = comm->atomRanges.numHomeAtoms();
 545     for (int d = 0; d < dd->ndim; d++)
 546     {
 547         cd = &comm->cd[d];
 548         for (const gmx_domdec_ind_t &ind : cd->ind)
 549         {
 550             /* Note: We provision for RVec instead of real, so a factor of 3
 551              * more than needed. The buffer actually already has this size
 552              * and we pass a plain pointer below, so this does not matter.
 553              */
 554             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 555             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 556             int                       n          = 0;
 557             for (int j : ind.index)
 558             {
 559                 sendBuffer[n++] = v[j];
 560             }
 561
 562             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 563
 564             gmx::ArrayRef<real>       receiveBuffer;
 565             if (cd->receiveInPlace)
 566             {
 567                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 568             }
 569             else
 570             {
 571                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 572             }
 573             /* Send and receive the data */
 574             ddSendrecv(dd, d, dddirBackward,
 575                        sendBuffer, receiveBuffer);
 576             if (!cd->receiveInPlace)
 577             {
 578                 int j = 0;
 579                 for (int zone = 0; zone < nzone; zone++)
 580                 {
 581                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 582                     {
 583                         v[i] = receiveBuffer[j++];
 584                     }
 585                 }
 586             }
 587             nat_tot += ind.nrecv[nzone+1];
 588         }
 589         nzone += nzone;
 590     }
 591 }
 592
 593 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 594 {
 595     int                    nzone, nat_tot;
 596     gmx_domdec_comm_t     *comm;
 597     gmx_domdec_comm_dim_t *cd;
 598
 599     comm = dd->comm;
 600
 601     nzone   = comm->zones.n/2;
 602     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 603     for (int d = dd->ndim-1; d >= 0; d--)
 604     {
 605         cd = &comm->cd[d];
 606         for (int p = cd->numPulses() - 1; p >= 0; p--)
 607         {
 608             const gmx_domdec_ind_t &ind = cd->ind[p];
 609
 610             /* Note: We provision for RVec instead of real, so a factor of 3
 611              * more than needed. The buffer actually already has this size
 612              * and we typecast, so this works as intended.
 613              */
 614             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 615             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 616             nat_tot -= ind.nrecv[nzone + 1];
 617
 618             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 619
 620             gmx::ArrayRef<real>       sendBuffer;
 621             if (cd->receiveInPlace)
 622             {
 623                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 624             }
 625             else
 626             {
 627                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 628                 int j = 0;
 629                 for (int zone = 0; zone < nzone; zone++)
 630                 {
 631                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 632                     {
 633                         sendBuffer[j++] = v[i];
 634                     }
 635                 }
 636             }
 637             /* Communicate the forces */
 638             ddSendrecv(dd, d, dddirForward,
 639                        sendBuffer, receiveBuffer);
 640             /* Add the received forces */
 641             int n = 0;
 642             for (int j : ind.index)
 643             {
 644                 v[j] += receiveBuffer[n];
 645                 n++;
 646             }
 647         }
 648         nzone /= 2;
 649     }
 650 }
 651
 652 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 653 {
 654     const gmx_domdec_comm_t &comm       = *dd->comm;
 655     const DDSystemInfo      &systemInfo = comm.systemInfo;
 656
 657     real                     r = -1;
 658     if (systemInfo.haveInterDomainMultiBodyBondeds)
 659     {
 660         if (comm.cutoff_mbody > 0)
 661         {
 662             r = comm.cutoff_mbody;
 663         }
 664         else
 665         {
 666             /* cutoff_mbody=0 means we do not have DLB */
 667             r = comm.cellsize_min[dd->dim[0]];
 668             for (int di = 1; di < dd->ndim; di++)
 669             {
 670                 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
 671             }
 672             if (comm.systemInfo.filterBondedCommunication)
 673             {
 674                 r = std::max(r, comm.cutoff_mbody);
 675             }
 676             else
 677             {
 678                 r = std::min(r, systemInfo.cutoff);
 679             }
 680         }
 681     }
 682
 683     return r;
 684 }
 685
 686 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 687 {
 688     real r_mb;
 689
 690     r_mb = dd_cutoff_multibody(dd);
 691
 692     return std::max(dd->comm->systemInfo.cutoff, r_mb);
 693 }
 694
 695
 696 static void dd_cart_coord2pmecoord(const DDRankSetup        &ddRankSetup,
 697                                    const CartesianRankSetup &cartSetup,
 698                                    const ivec                coord,
 699                                    ivec                      coord_pme)
 700 {
 701     const int nc   = ddRankSetup.numPPCells[cartSetup.cartpmedim];
 702     const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
 703     copy_ivec(coord, coord_pme);
 704     coord_pme[cartSetup.cartpmedim] =
 705         nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 706 }
 707
 708 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
 709 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
 710                             const int          ddCellIndex)
 711 {
 712     const int npp  = ddRankSetup.numPPRanks;
 713     const int npme = ddRankSetup.numRanksDoingPme;
 714
 715     /* Here we assign a PME node to communicate with this DD node
 716      * by assuming that the major index of both is x.
 717      * We add npme/2 to obtain an even distribution.
 718      */
 719     return (ddCellIndex*npme + npme/2)/npp;
 720 }
 721
 722 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
 723 {
 724     std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
 725
 726     int              n = 0;
 727     for (int i = 0; i < ddRankSetup.numPPRanks; i++)
 728     {
 729         const int p0 = ddindex2pmeindex(ddRankSetup, i);
 730         const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
 731         if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
 732         {
 733             if (debug)
 734             {
 735                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 736             }
 737             pmeRanks[n] = i + 1 + n;
 738             n++;
 739         }
 740     }
 741
 742     return pmeRanks;
 743 }
 744
 745 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 746 {
 747     gmx_domdec_t *dd;
 748     ivec          coords;
 749     int           slab;
 750
 751     dd = cr->dd;
 752     /*
 753        if (dd->comm->bCartesian) {
 754        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 755        dd_coords2pmecoords(dd,coords,coords_pme);
 756        copy_ivec(dd->ntot,nc);
 757        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 758        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 759
 760        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 761        } else {
 762        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 763        }
 764      */
 765     coords[XX] = x;
 766     coords[YY] = y;
 767     coords[ZZ] = z;
 768     slab       = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
 769
 770     return slab;
 771 }
 772
 773 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 774 {
 775     const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
 776     ivec                      coords    = { x, y, z };
 777     int                       nodeid    = -1;
 778
 779     if (cartSetup.bCartesianPP_PME)
 780     {
 781 #if GMX_MPI
 782         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 783 #endif
 784     }
 785     else
 786     {
 787         int ddindex = dd_index(cr->dd->nc, coords);
 788         if (cartSetup.bCartesianPP)
 789         {
 790             nodeid = cartSetup.ddindex2simnodeid[ddindex];
 791         }
 792         else
 793         {
 794             if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
 795             {
 796                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 797             }
 798             else
 799             {
 800                 nodeid = ddindex;
 801             }
 802         }
 803     }
 804
 805     return nodeid;
 806 }
 807
 808 static int dd_simnode2pmenode(const DDRankSetup          &ddRankSetup,
 809                               const CartesianRankSetup   &cartSetup,
 810                               gmx::ArrayRef<const int>    pmeRanks,
 811                               const t_commrec gmx_unused *cr,
 812                               const int                   sim_nodeid)
 813 {
 814     int                pmenode = -1;
 815
 816     /* This assumes a uniform x domain decomposition grid cell size */
 817     if (cartSetup.bCartesianPP_PME)
 818     {
 819 #if GMX_MPI
 820         ivec coord, coord_pme;
 821         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 822         if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
 823         {
 824             /* This is a PP rank */
 825             dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 826             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 827         }
 828 #endif
 829     }
 830     else if (cartSetup.bCartesianPP)
 831     {
 832         if (sim_nodeid < ddRankSetup.numPPRanks)
 833         {
 834             pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 835         }
 836     }
 837     else
 838     {
 839         /* This assumes DD cells with identical x coordinates
 840          * are numbered sequentially.
 841          */
 842         if (pmeRanks.empty())
 843         {
 844             if (sim_nodeid < ddRankSetup.numPPRanks)
 845             {
 846                 /* The DD index equals the nodeid */
 847                 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
 848             }
 849         }
 850         else
 851         {
 852             int i = 0;
 853             while (sim_nodeid > pmeRanks[i])
 854             {
 855                 i++;
 856             }
 857             if (sim_nodeid < pmeRanks[i])
 858             {
 859                 pmenode = pmeRanks[i];
 860             }
 861         }
 862     }
 863
 864     return pmenode;
 865 }
 866
 867 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 868 {
 869     if (dd != nullptr)
 870     {
 871         return {
 872                    dd->comm->ddRankSetup.npmenodes_x,
 873                    dd->comm->ddRankSetup.npmenodes_y
 874         };
 875     }
 876     else
 877     {
 878         return {
 879                    1, 1
 880         };
 881     }
 882 }
 883
 884 std::vector<int> get_pme_ddranks(const t_commrec *cr,
 885                                  const int        pmenodeid)
 886 {
 887     const DDRankSetup        &ddRankSetup = cr->dd->comm->ddRankSetup;
 888     const CartesianRankSetup &cartSetup   = cr->dd->comm->cartesianRankSetup;
 889     GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
 890     std::vector<int>          ddranks;
 891     ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
 892
 893     for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
 894     {
 895         for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
 896         {
 897             for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
 898             {
 899                 if (cartSetup.bCartesianPP_PME)
 900                 {
 901                     ivec coord = { x, y, z };
 902                     ivec coord_pme;
 903                     dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
 904                     if (cr->dd->ci[XX] == coord_pme[XX] &&
 905                         cr->dd->ci[YY] == coord_pme[YY] &&
 906                         cr->dd->ci[ZZ] == coord_pme[ZZ])
 907                     {
 908                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 909                     }
 910                 }
 911                 else
 912                 {
 913                     /* The slab corresponds to the nodeid in the PME group */
 914                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 915                     {
 916                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 917                     }
 918                 }
 919             }
 920         }
 921     }
 922     return ddranks;
 923 }
 924
 925 static gmx_bool receive_vir_ener(const gmx_domdec_t       *dd,
 926                                  gmx::ArrayRef<const int>  pmeRanks,
 927                                  const t_commrec          *cr)
 928 {
 929     gmx_bool           bReceive    = TRUE;
 930
 931     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 932     if (ddRankSetup.usePmeOnlyRanks)
 933     {
 934         const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
 935         if (cartSetup.bCartesianPP_PME)
 936         {
 937 #if GMX_MPI
 938             int  pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 939             ivec coords;
 940             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 941             coords[cartSetup.cartpmedim]++;
 942             if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
 943             {
 944                 int rank;
 945                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 946                 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
 947                 {
 948                     /* This is not the last PP node for pmenode */
 949                     bReceive = FALSE;
 950                 }
 951             }
 952 #else
 953             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 954 #endif
 955         }
 956         else
 957         {
 958             int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
 959             if (cr->sim_nodeid+1 < cr->nnodes &&
 960                 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
 961             {
 962                 /* This is not the last PP node for pmenode */
 963                 bReceive = FALSE;
 964             }
 965         }
 966     }
 967
 968     return bReceive;
 969 }
 970
 971 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
 972 {
 973     gmx_domdec_comm_t *comm;
 974     int                i;
 975
 976     comm = dd->comm;
 977
 978     snew(*dim_f, dd->nc[dim]+1);
 979     (*dim_f)[0] = 0;
 980     for (i = 1; i < dd->nc[dim]; i++)
 981     {
 982         if (comm->slb_frac[dim])
 983         {
 984             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 985         }
 986         else
 987         {
 988             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
 989         }
 990     }
 991     (*dim_f)[dd->nc[dim]] = 1;
 992 }
 993
 994 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
 995 {
 996     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
 997
 998     if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
 999     {
1000         ddpme->dim = YY;
1001     }
1002     else
1003     {
1004         ddpme->dim = dimind;
1005     }
1006     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1007
1008     ddpme->nslab = (ddpme->dim == 0 ?
1009                     ddRankSetup.npmenodes_x :
1010                     ddRankSetup.npmenodes_y);
1011
1012     if (ddpme->nslab <= 1)
1013     {
1014         return;
1015     }
1016
1017     const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
1018     /* Determine for each PME slab the PP location range for dimension dim */
1019     snew(ddpme->pp_min, ddpme->nslab);
1020     snew(ddpme->pp_max, ddpme->nslab);
1021     for (int slab = 0; slab < ddpme->nslab; slab++)
1022     {
1023         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1024         ddpme->pp_max[slab] = 0;
1025     }
1026     for (int i = 0; i < dd->nnodes; i++)
1027     {
1028         ivec xyz;
1029         ddindex2xyz(dd->nc, i, xyz);
1030         /* For y only use our y/z slab.
1031          * This assumes that the PME x grid size matches the DD grid size.
1032          */
1033         if (dimind == 0 || xyz[XX] == dd->ci[XX])
1034         {
1035             const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
1036             int       slab;
1037             if (dimind == 0)
1038             {
1039                 slab = pmeindex/nso;
1040             }
1041             else
1042             {
1043                 slab = pmeindex % ddpme->nslab;
1044             }
1045             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1046             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1047         }
1048     }
1049
1050     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1051 }
1052
1053 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1054 {
1055     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1056
1057     if (ddRankSetup.ddpme[0].dim == XX)
1058     {
1059         return ddRankSetup.ddpme[0].maxshift;
1060     }
1061     else
1062     {
1063         return 0;
1064     }
1065 }
1066
1067 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1068 {
1069     const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1070
1071     if (ddRankSetup.ddpme[0].dim == YY)
1072     {
1073         return ddRankSetup.ddpme[0].maxshift;
1074     }
1075     else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1076     {
1077         return ddRankSetup.ddpme[1].maxshift;
1078     }
1079     else
1080     {
1081         return 0;
1082     }
1083 }
1084
1085 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1086 {
1087     return dd.comm->systemInfo.haveSplitConstraints;
1088 }
1089
1090 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1091 {
1092     /* Note that the cycles value can be incorrect, either 0 or some
1093      * extremely large value, when our thread migrated to another core
1094      * with an unsynchronized cycle counter. If this happens less often
1095      * that once per nstlist steps, this will not cause issues, since
1096      * we later subtract the maximum value from the sum over nstlist steps.
1097      * A zero count will slightly lower the total, but that's a small effect.
1098      * Note that the main purpose of the subtraction of the maximum value
1099      * is to avoid throwing off the load balancing when stalls occur due
1100      * e.g. system activity or network congestion.
1101      */
1102     dd->comm->cycl[ddCycl] += cycles;
1103     dd->comm->cycl_n[ddCycl]++;
1104     if (cycles > dd->comm->cycl_max[ddCycl])
1105     {
1106         dd->comm->cycl_max[ddCycl] = cycles;
1107     }
1108 }
1109
1110 #if GMX_MPI
1111 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1112 {
1113     MPI_Comm           c_row;
1114     int                dim, i, rank;
1115     ivec               loc_c;
1116     gmx_bool           bPartOfGroup = FALSE;
1117
1118     dim = dd->dim[dim_ind];
1119     copy_ivec(loc, loc_c);
1120     for (i = 0; i < dd->nc[dim]; i++)
1121     {
1122         loc_c[dim] = i;
1123         rank       = dd_index(dd->nc, loc_c);
1124         if (rank == dd->rank)
1125         {
1126             /* This process is part of the group */
1127             bPartOfGroup = TRUE;
1128         }
1129     }
1130     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1131                    &c_row);
1132     if (bPartOfGroup)
1133     {
1134         dd->comm->mpi_comm_load[dim_ind] = c_row;
1135         if (!isDlbDisabled(dd->comm))
1136         {
1137             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1138
1139             if (dd->ci[dim] == dd->master_ci[dim])
1140             {
1141                 /* This is the root process of this row */
1142                 cellsizes.rowMaster  = std::make_unique<RowMaster>();
1143
1144                 RowMaster &rowMaster = *cellsizes.rowMaster;
1145                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1146                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1147                 rowMaster.isCellMin.resize(dd->nc[dim]);
1148                 if (dim_ind > 0)
1149                 {
1150                     rowMaster.bounds.resize(dd->nc[dim]);
1151                 }
1152                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1153             }
1154             else
1155             {
1156                 /* This is not a root process, we only need to receive cell_f */
1157                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1158             }
1159         }
1160         if (dd->ci[dim] == dd->master_ci[dim])
1161         {
1162             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1163         }
1164     }
1165 }
1166 #endif
1167
1168 void dd_setup_dlb_resource_sharing(t_commrec            *cr,
1169                                    int                   gpu_id)
1170 {
1171 #if GMX_MPI
1172     int           physicalnode_id_hash;
1173     gmx_domdec_t *dd;
1174     MPI_Comm      mpi_comm_pp_physicalnode;
1175
1176     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1177     {
1178         /* Only ranks with short-ranged tasks (currently) use GPUs.
1179          * If we don't have GPUs assigned, there are no resources to share.
1180          */
1181         return;
1182     }
1183
1184     physicalnode_id_hash = gmx_physicalnode_id_hash();
1185
1186     dd = cr->dd;
1187
1188     if (debug)
1189     {
1190         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1191         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1192                 dd->rank, physicalnode_id_hash, gpu_id);
1193     }
1194     /* Split the PP communicator over the physical nodes */
1195     /* TODO: See if we should store this (before), as it's also used for
1196      * for the nodecomm summation.
1197      */
1198     // TODO PhysicalNodeCommunicator could be extended/used to handle
1199     // the need for per-node per-group communicators.
1200     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1201                    &mpi_comm_pp_physicalnode);
1202     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1203                    &dd->comm->mpi_comm_gpu_shared);
1204     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1205     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1206
1207     if (debug)
1208     {
1209         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1210     }
1211
1212     /* Note that some ranks could share a GPU, while others don't */
1213
1214     if (dd->comm->nrank_gpu_shared == 1)
1215     {
1216         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1217     }
1218 #else
1219     GMX_UNUSED_VALUE(cr);
1220     GMX_UNUSED_VALUE(gpu_id);
1221 #endif
1222 }
1223
1224 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1225 {
1226 #if GMX_MPI
1227     int  dim0, dim1, i, j;
1228     ivec loc;
1229
1230     if (debug)
1231     {
1232         fprintf(debug, "Making load communicators\n");
1233     }
1234
1235     dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1236     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1237
1238     if (dd->ndim == 0)
1239     {
1240         return;
1241     }
1242
1243     clear_ivec(loc);
1244     make_load_communicator(dd, 0, loc);
1245     if (dd->ndim > 1)
1246     {
1247         dim0 = dd->dim[0];
1248         for (i = 0; i < dd->nc[dim0]; i++)
1249         {
1250             loc[dim0] = i;
1251             make_load_communicator(dd, 1, loc);
1252         }
1253     }
1254     if (dd->ndim > 2)
1255     {
1256         dim0 = dd->dim[0];
1257         for (i = 0; i < dd->nc[dim0]; i++)
1258         {
1259             loc[dim0] = i;
1260             dim1      = dd->dim[1];
1261             for (j = 0; j < dd->nc[dim1]; j++)
1262             {
1263                 loc[dim1] = j;
1264                 make_load_communicator(dd, 2, loc);
1265             }
1266         }
1267     }
1268
1269     if (debug)
1270     {
1271         fprintf(debug, "Finished making load communicators\n");
1272     }
1273 #endif
1274 }
1275
1276 /*! \brief Sets up the relation between neighboring domains and zones */
1277 static void setup_neighbor_relations(gmx_domdec_t *dd)
1278 {
1279     int                     d, dim, i, j, m;
1280     ivec                    tmp, s;
1281     gmx_domdec_zones_t     *zones;
1282     gmx_domdec_ns_ranges_t *izone;
1283     GMX_ASSERT(dd->ndim >= 0, "Must have non-negative number of dimensions for DD");
1284
1285     for (d = 0; d < dd->ndim; d++)
1286     {
1287         dim = dd->dim[d];
1288         copy_ivec(dd->ci, tmp);
1289         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1290         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1291         copy_ivec(dd->ci, tmp);
1292         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1293         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1294         if (debug)
1295         {
1296             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1297                     dd->rank, dim,
1298                     dd->neighbor[d][0],
1299                     dd->neighbor[d][1]);
1300         }
1301     }
1302
1303     int nzone  = (1 << dd->ndim);
1304     GMX_ASSERT(dd->ndim < DIM, "Invalid number of dimensions");
1305     int nizone = (1 << std::max(dd->ndim - 1, 0));
1306     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1307
1308     zones = &dd->comm->zones;
1309
1310     for (i = 0; i < nzone; i++)
1311     {
1312         m = 0;
1313         clear_ivec(zones->shift[i]);
1314         for (d = 0; d < dd->ndim; d++)
1315         {
1316             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1317         }
1318     }
1319
1320     zones->n = nzone;
1321     for (i = 0; i < nzone; i++)
1322     {
1323         for (d = 0; d < DIM; d++)
1324         {
1325             s[d] = dd->ci[d] - zones->shift[i][d];
1326             if (s[d] < 0)
1327             {
1328                 s[d] += dd->nc[d];
1329             }
1330             else if (s[d] >= dd->nc[d])
1331             {
1332                 s[d] -= dd->nc[d];
1333             }
1334         }
1335     }
1336     zones->nizone = nizone;
1337     for (i = 0; i < zones->nizone; i++)
1338     {
1339         assert(ddNonbondedZonePairRanges[i][0] == i);
1340
1341         izone     = &zones->izone[i];
1342         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1343          * j-zones up to nzone.
1344          */
1345         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1346         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1347         for (dim = 0; dim < DIM; dim++)
1348         {
1349             if (dd->nc[dim] == 1)
1350             {
1351                 /* All shifts should be allowed */
1352                 izone->shift0[dim] = -1;
1353                 izone->shift1[dim] = 1;
1354             }
1355             else
1356             {
1357                 /* Determine the min/max j-zone shift wrt the i-zone */
1358                 izone->shift0[dim] = 1;
1359                 izone->shift1[dim] = -1;
1360                 for (j = izone->j0; j < izone->j1; j++)
1361                 {
1362                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1363                     if (shift_diff < izone->shift0[dim])
1364                     {
1365                         izone->shift0[dim] = shift_diff;
1366                     }
1367                     if (shift_diff > izone->shift1[dim])
1368                     {
1369                         izone->shift1[dim] = shift_diff;
1370                     }
1371                 }
1372             }
1373         }
1374     }
1375
1376     if (!isDlbDisabled(dd->comm))
1377     {
1378         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1379     }
1380
1381     if (dd->comm->ddSettings.recordLoad)
1382     {
1383         make_load_communicators(dd);
1384     }
1385 }
1386
1387 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1388                                  gmx_domdec_t         *dd,
1389                                  t_commrec gmx_unused *cr,
1390                                  bool gmx_unused       reorder)
1391 {
1392 #if GMX_MPI
1393     gmx_domdec_comm_t  *comm      = dd->comm;
1394     CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1395
1396     if (cartSetup.bCartesianPP)
1397     {
1398         /* Set up cartesian communication for the particle-particle part */
1399         GMX_LOG(mdlog.info).appendTextFormatted(
1400                 "Will use a Cartesian communicator: %d x %d x %d",
1401                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1402
1403         ivec periods;
1404         for (int i = 0; i < DIM; i++)
1405         {
1406             periods[i] = TRUE;
1407         }
1408         MPI_Comm comm_cart;
1409         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1410                         &comm_cart);
1411         /* We overwrite the old communicator with the new cartesian one */
1412         cr->mpi_comm_mygroup = comm_cart;
1413     }
1414
1415     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1416     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1417
1418     if (cartSetup.bCartesianPP_PME)
1419     {
1420         /* Since we want to use the original cartesian setup for sim,
1421          * and not the one after split, we need to make an index.
1422          */
1423         cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1424         cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1425         gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1426         /* Get the rank of the DD master,
1427          * above we made sure that the master node is a PP node.
1428          */
1429         int rank;
1430         if (MASTER(cr))
1431         {
1432             rank = dd->rank;
1433         }
1434         else
1435         {
1436             rank = 0;
1437         }
1438         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1439     }
1440     else if (cartSetup.bCartesianPP)
1441     {
1442         if (!comm->ddRankSetup.usePmeOnlyRanks)
1443         {
1444             /* The PP communicator is also
1445              * the communicator for this simulation
1446              */
1447             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1448         }
1449         cr->nodeid = dd->rank;
1450
1451         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1452
1453         /* We need to make an index to go from the coordinates
1454          * to the nodeid of this simulation.
1455          */
1456         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1457         std::vector<int> buf(dd->nnodes);
1458         if (thisRankHasDuty(cr, DUTY_PP))
1459         {
1460             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1461         }
1462         /* Communicate the ddindex to simulation nodeid index */
1463         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1464                       cr->mpi_comm_mysim);
1465
1466         /* Determine the master coordinates and rank.
1467          * The DD master should be the same node as the master of this sim.
1468          */
1469         for (int i = 0; i < dd->nnodes; i++)
1470         {
1471             if (cartSetup.ddindex2simnodeid[i] == 0)
1472             {
1473                 ddindex2xyz(dd->nc, i, dd->master_ci);
1474                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1475             }
1476         }
1477         if (debug)
1478         {
1479             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1480         }
1481     }
1482     else
1483     {
1484         /* No Cartesian communicators */
1485         /* We use the rank in dd->comm->all as DD index */
1486         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1487         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1488         dd->masterrank = 0;
1489         clear_ivec(dd->master_ci);
1490     }
1491 #endif
1492
1493     GMX_LOG(mdlog.info).appendTextFormatted(
1494             "Domain decomposition rank %d, coordinates %d %d %d\n",
1495             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1496     if (debug)
1497     {
1498         fprintf(debug,
1499                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1500                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1501     }
1502 }
1503
1504 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1505                                       t_commrec            *cr)
1506 {
1507 #if GMX_MPI
1508     CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1509
1510     if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1511     {
1512         cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1513         std::vector<int> buf(dd->nnodes);
1514         if (thisRankHasDuty(cr, DUTY_PP))
1515         {
1516             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1517         }
1518         /* Communicate the ddindex to simulation nodeid index */
1519         MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1520                       cr->mpi_comm_mysim);
1521     }
1522 #else
1523     GMX_UNUSED_VALUE(dd);
1524     GMX_UNUSED_VALUE(cr);
1525 #endif
1526 }
1527
1528 static CartesianRankSetup
1529 split_communicator(const gmx::MDLogger    &mdlog,
1530                    t_commrec              *cr,
1531                    const DdRankOrder       ddRankOrder,
1532                    bool gmx_unused         reorder,
1533                    const DDRankSetup      &ddRankSetup,
1534                    ivec                    ddCellIndex,
1535                    std::vector<int>       *pmeRanks)
1536 {
1537     CartesianRankSetup cartSetup;
1538
1539     cartSetup.bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1540     cartSetup.bCartesianPP_PME = false;
1541
1542     const ivec &numDDCells     = ddRankSetup.numPPCells;
1543     /* Initially we set ntot to the number of PP cells */
1544     copy_ivec(numDDCells, cartSetup.ntot);
1545
1546     if (cartSetup.bCartesianPP)
1547     {
1548         const int numDDCellsTot = ddRankSetup.numPPRanks;
1549         bool      bDiv[DIM];
1550         for (int i = 1; i < DIM; i++)
1551         {
1552             bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1553         }
1554         if (bDiv[YY] || bDiv[ZZ])
1555         {
1556             cartSetup.bCartesianPP_PME = TRUE;
1557             /* If we have 2D PME decomposition, which is always in x+y,
1558              * we stack the PME only nodes in z.
1559              * Otherwise we choose the direction that provides the thinnest slab
1560              * of PME only nodes as this will have the least effect
1561              * on the PP communication.
1562              * But for the PME communication the opposite might be better.
1563              */
1564             if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1565                              !bDiv[YY] ||
1566                              numDDCells[YY] > numDDCells[ZZ]))
1567             {
1568                 cartSetup.cartpmedim = ZZ;
1569             }
1570             else
1571             {
1572                 cartSetup.cartpmedim = YY;
1573             }
1574             cartSetup.ntot[cartSetup.cartpmedim]
1575                 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1576         }
1577         else
1578         {
1579             GMX_LOG(mdlog.info).appendTextFormatted(
1580                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1581                     ddRankSetup.numRanksDoingPme,
1582                     numDDCells[XX], numDDCells[YY],
1583                     numDDCells[XX], numDDCells[ZZ]);
1584             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1585         }
1586     }
1587
1588     if (cartSetup.bCartesianPP_PME)
1589     {
1590 #if GMX_MPI
1591         int  rank;
1592         ivec periods;
1593
1594         GMX_LOG(mdlog.info).appendTextFormatted(
1595                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1596                 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1597
1598         for (int i = 0; i < DIM; i++)
1599         {
1600             periods[i] = TRUE;
1601         }
1602         MPI_Comm comm_cart;
1603         MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1604                         &comm_cart);
1605         MPI_Comm_rank(comm_cart, &rank);
1606         if (MASTER(cr) && rank != 0)
1607         {
1608             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1609         }
1610
1611         /* With this assigment we loose the link to the original communicator
1612          * which will usually be MPI_COMM_WORLD, unless have multisim.
1613          */
1614         cr->mpi_comm_mysim = comm_cart;
1615         cr->sim_nodeid     = rank;
1616
1617         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1618
1619         GMX_LOG(mdlog.info).appendTextFormatted(
1620                 "Cartesian rank %d, coordinates %d %d %d\n",
1621                 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1622
1623         if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1624         {
1625             cr->duty = DUTY_PP;
1626         }
1627         if (!ddRankSetup.usePmeOnlyRanks ||
1628             ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1629         {
1630             cr->duty = DUTY_PME;
1631         }
1632
1633         /* Split the sim communicator into PP and PME only nodes */
1634         MPI_Comm_split(cr->mpi_comm_mysim,
1635                        getThisRankDuties(cr),
1636                        dd_index(cartSetup.ntot, ddCellIndex),
1637                        &cr->mpi_comm_mygroup);
1638 #else
1639         GMX_UNUSED_VALUE(ddCellIndex);
1640 #endif
1641     }
1642     else
1643     {
1644         switch (ddRankOrder)
1645         {
1646             case DdRankOrder::pp_pme:
1647                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1648                 break;
1649             case DdRankOrder::interleave:
1650                 /* Interleave the PP-only and PME-only ranks */
1651                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1652                 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1653                 break;
1654             case DdRankOrder::cartesian:
1655                 break;
1656             default:
1657                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1658         }
1659
1660         if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1661         {
1662             cr->duty = DUTY_PME;
1663         }
1664         else
1665         {
1666             cr->duty = DUTY_PP;
1667         }
1668 #if GMX_MPI
1669         /* Split the sim communicator into PP and PME only nodes */
1670         MPI_Comm_split(cr->mpi_comm_mysim,
1671                        getThisRankDuties(cr),
1672                        cr->nodeid,
1673                        &cr->mpi_comm_mygroup);
1674         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1675 #endif
1676     }
1677
1678     GMX_LOG(mdlog.info).appendTextFormatted(
1679             "This rank does only %s work.\n",
1680             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1681
1682     return cartSetup;
1683 }
1684
1685 /*! \brief Makes the PP communicator and the PME communicator, when needed
1686  *
1687  * Returns the Cartesian rank setup.
1688  * Sets \p cr->mpi_comm_mygroup
1689  * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1690  * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1691  */
1692 static CartesianRankSetup
1693 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1694                        const DDSettings    &ddSettings,
1695                        const DdRankOrder    ddRankOrder,
1696                        const DDRankSetup   &ddRankSetup,
1697                        t_commrec           *cr,
1698                        ivec                 ddCellIndex,
1699                        std::vector<int>    *pmeRanks)
1700 {
1701     CartesianRankSetup cartSetup;
1702
1703     if (ddRankSetup.usePmeOnlyRanks)
1704     {
1705         /* Split the communicator into a PP and PME part */
1706         cartSetup =
1707             split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1708                                ddRankSetup, ddCellIndex, pmeRanks);
1709     }
1710     else
1711     {
1712         /* All nodes do PP and PME */
1713         /* We do not require separate communicators */
1714         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1715
1716         cartSetup.bCartesianPP     = false;
1717         cartSetup.bCartesianPP_PME = false;
1718     }
1719
1720     return cartSetup;
1721 }
1722
1723 /*! \brief For PP ranks, sets or makes the communicator
1724  *
1725  * For PME ranks get the rank id.
1726  * For PP only ranks, sets the PME-only rank.
1727  */
1728 static void setupGroupCommunication(const gmx::MDLogger      &mdlog,
1729                                     const DDSettings         &ddSettings,
1730                                     gmx::ArrayRef<const int>  pmeRanks,
1731                                     t_commrec                *cr,
1732                                     gmx_domdec_t             *dd)
1733 {
1734     const DDRankSetup        &ddRankSetup = dd->comm->ddRankSetup;
1735     const CartesianRankSetup &cartSetup   = dd->comm->cartesianRankSetup;
1736
1737     if (thisRankHasDuty(cr, DUTY_PP))
1738     {
1739         /* Copy or make a new PP communicator */
1740
1741         /* We (possibly) reordered the nodes in split_communicator,
1742          * so it is no longer required in make_pp_communicator.
1743          */
1744         const bool useCartesianReorder =
1745             (ddSettings.useCartesianReorder &&
1746              !cartSetup.bCartesianPP_PME);
1747
1748         make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1749     }
1750     else
1751     {
1752         receive_ddindex2simnodeid(dd, cr);
1753     }
1754
1755     if (!thisRankHasDuty(cr, DUTY_PME))
1756     {
1757         /* Set up the commnuication to our PME node */
1758         dd->pme_nodeid           = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1759         dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1760         if (debug)
1761         {
1762             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1763                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1764         }
1765     }
1766     else
1767     {
1768         dd->pme_nodeid = -1;
1769     }
1770
1771     /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1772     if (MASTER(cr))
1773     {
1774         dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1775                                                     dd->comm->cgs_gl.nr,
1776                                                     dd->comm->cgs_gl.index[dd->comm->cgs_gl.nr]);
1777     }
1778 }
1779
1780 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1781                           const char *dir, int nc, const char *size_string)
1782 {
1783     real  *slb_frac, tot;
1784     int    i, n;
1785     double dbl;
1786
1787     slb_frac = nullptr;
1788     if (nc > 1 && size_string != nullptr)
1789     {
1790         GMX_LOG(mdlog.info).appendTextFormatted(
1791                 "Using static load balancing for the %s direction", dir);
1792         snew(slb_frac, nc);
1793         tot = 0;
1794         for (i = 0; i < nc; i++)
1795         {
1796             dbl = 0;
1797             sscanf(size_string, "%20lf%n", &dbl, &n);
1798             if (dbl == 0)
1799             {
1800                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1801             }
1802             slb_frac[i]  = dbl;
1803             size_string += n;
1804             tot         += slb_frac[i];
1805         }
1806         /* Normalize */
1807         std::string relativeCellSizes = "Relative cell sizes:";
1808         for (i = 0; i < nc; i++)
1809         {
1810             slb_frac[i]       /= tot;
1811             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1812         }
1813         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1814     }
1815
1816     return slb_frac;
1817 }
1818
1819 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1820 {
1821     int                  n     = 0;
1822     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1823     int                  nmol;
1824     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1825     {
1826         for (auto &ilist : extractILists(*ilists, IF_BOND))
1827         {
1828             if (NRAL(ilist.functionType) >  2)
1829             {
1830                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1831             }
1832         }
1833     }
1834
1835     return n;
1836 }
1837
1838 static int dd_getenv(const gmx::MDLogger &mdlog,
1839                      const char *env_var, int def)
1840 {
1841     char *val;
1842     int   nst;
1843
1844     nst = def;
1845     val = getenv(env_var);
1846     if (val)
1847     {
1848         if (sscanf(val, "%20d", &nst) <= 0)
1849         {
1850             nst = 1;
1851         }
1852         GMX_LOG(mdlog.info).appendTextFormatted(
1853                 "Found env.var. %s = %s, using value %d",
1854                 env_var, val, nst);
1855     }
1856
1857     return nst;
1858 }
1859
1860 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1861                                   const t_inputrec    *ir,
1862                                   const gmx::MDLogger &mdlog)
1863 {
1864     if (ir->ePBC == epbcSCREW &&
1865         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1866     {
1867         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1868     }
1869
1870     if (ir->nstlist == 0)
1871     {
1872         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1873     }
1874
1875     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1876     {
1877         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1878     }
1879 }
1880
1881 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1882                                  const ivec         numDomains)
1883 {
1884     real r = ddbox.box_size[XX];
1885     for (int d = 0; d < DIM; d++)
1886     {
1887         if (numDomains[d] > 1)
1888         {
1889             /* Check using the initial average cell size */
1890             r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1891         }
1892     }
1893
1894     return r;
1895 }
1896
1897 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1898  */
1899 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1900                                   const std::string   &reasonStr,
1901                                   const gmx::MDLogger &mdlog)
1902 {
1903     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1904     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1905
1906     if (cmdlineDlbState == DlbState::onUser)
1907     {
1908         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1909     }
1910     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1911     {
1912         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1913     }
1914     return DlbState::offForever;
1915 }
1916
1917 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1918  *
1919  * This function parses the parameters of "-dlb" command line option setting
1920  * corresponding state values. Then it checks the consistency of the determined
1921  * state with other run parameters and settings. As a result, the initial state
1922  * may be altered or an error may be thrown if incompatibility of options is detected.
1923  *
1924  * \param [in] mdlog       Logger.
1925  * \param [in] dlbOption   Enum value for the DLB option.
1926  * \param [in] bRecordLoad True if the load balancer is recording load information.
1927  * \param [in] mdrunOptions  Options for mdrun.
1928  * \param [in] ir          Pointer mdrun to input parameters.
1929  * \returns                DLB initial/startup state.
1930  */
1931 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1932                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1933                                          const gmx::MdrunOptions &mdrunOptions,
1934                                          const t_inputrec *ir)
1935 {
1936     DlbState dlbState = DlbState::offCanTurnOn;
1937
1938     switch (dlbOption)
1939     {
1940         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1941         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1942         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1943         default: gmx_incons("Invalid dlbOption enum value");
1944     }
1945
1946     /* Reruns don't support DLB: bail or override auto mode */
1947     if (mdrunOptions.rerun)
1948     {
1949         std::string reasonStr = "it is not supported in reruns.";
1950         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1951     }
1952
1953     /* Unsupported integrators */
1954     if (!EI_DYNAMICS(ir->eI))
1955     {
1956         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1957         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1958     }
1959
1960     /* Without cycle counters we can't time work to balance on */
1961     if (!bRecordLoad)
1962     {
1963         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1964         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1965     }
1966
1967     if (mdrunOptions.reproducible)
1968     {
1969         std::string reasonStr = "you started a reproducible run.";
1970         switch (dlbState)
1971         {
1972             case DlbState::offUser:
1973                 break;
1974             case DlbState::offForever:
1975                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1976                 break;
1977             case DlbState::offCanTurnOn:
1978                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1979             case DlbState::onCanTurnOff:
1980                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1981                 break;
1982             case DlbState::onUser:
1983                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1984             default:
1985                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1986         }
1987     }
1988
1989     return dlbState;
1990 }
1991
1992 /* Sets the order of the DD dimensions, returns the number of DD dimensions */
1993 static int set_dd_dim(const ivec        numDDCells,
1994                       const DDSettings &ddSettings,
1995                       ivec              dims)
1996 {
1997     int ndim = 0;
1998     if (ddSettings.useDDOrderZYX)
1999     {
2000         /* Decomposition order z,y,x */
2001         for (int dim = DIM - 1; dim >= 0; dim--)
2002         {
2003             if (numDDCells[dim] > 1)
2004             {
2005                 dims[ndim++] = dim;
2006             }
2007         }
2008     }
2009     else
2010     {
2011         /* Decomposition order x,y,z */
2012         for (int dim = 0; dim < DIM; dim++)
2013         {
2014             if (numDDCells[dim] > 1)
2015             {
2016                 dims[ndim++] = dim;
2017             }
2018         }
2019     }
2020
2021     if (ndim == 0)
2022     {
2023         /* Set dim[0] to avoid extra checks on ndim in several places */
2024         dims[0] = XX;
2025     }
2026
2027     return ndim;
2028 }
2029
2030 static gmx_domdec_comm_t *init_dd_comm()
2031 {
2032     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2033
2034     comm->n_load_have      = 0;
2035     comm->n_load_collect   = 0;
2036
2037     comm->haveTurnedOffDlb = false;
2038
2039     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2040     {
2041         comm->sum_nat[i] = 0;
2042     }
2043     comm->ndecomp   = 0;
2044     comm->nload     = 0;
2045     comm->load_step = 0;
2046     comm->load_sum  = 0;
2047     comm->load_max  = 0;
2048     clear_ivec(comm->load_lim);
2049     comm->load_mdf  = 0;
2050     comm->load_pme  = 0;
2051
2052     /* This should be replaced by a unique pointer */
2053     comm->balanceRegion = ddBalanceRegionAllocate();
2054
2055     return comm;
2056 }
2057
2058 /* Returns whether mtop contains constraints and/or vsites */
2059 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2060 {
2061     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2062     int  nmol;
2063     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2064     {
2065         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2066         {
2067             return true;
2068         }
2069     }
2070
2071     return false;
2072 }
2073
2074 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2075                               const gmx_mtop_t    &mtop,
2076                               const t_inputrec    &inputrec,
2077                               const real           cutoffMargin,
2078                               DDSystemInfo        *systemInfo)
2079 {
2080     /* When we have constraints and/or vsites, it is beneficial to use
2081      * update groups (when possible) to allow independent update of groups.
2082      */
2083     if (!systemHasConstraintsOrVsites(mtop))
2084     {
2085         /* No constraints or vsites, atoms can be updated independently */
2086         return;
2087     }
2088
2089     systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2090     systemInfo->useUpdateGroups               =
2091         (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2092          getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2093
2094     if (systemInfo->useUpdateGroups)
2095     {
2096         int numUpdateGroups = 0;
2097         for (const auto &molblock : mtop.molblock)
2098         {
2099             numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2100         }
2101
2102         systemInfo->maxUpdateGroupRadius =
2103             computeMaxUpdateGroupRadius(mtop,
2104                                         systemInfo->updateGroupingPerMoleculetype,
2105                                         maxReferenceTemperature(inputrec));
2106
2107         /* To use update groups, the large domain-to-domain cutoff distance
2108          * should be compatible with the box size.
2109          */
2110         systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2111
2112         if (systemInfo->useUpdateGroups)
2113         {
2114             GMX_LOG(mdlog.info).appendTextFormatted(
2115                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2116                     numUpdateGroups,
2117                     mtop.natoms/static_cast<double>(numUpdateGroups),
2118                     systemInfo->maxUpdateGroupRadius);
2119         }
2120         else
2121         {
2122             GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2123             systemInfo->updateGroupingPerMoleculetype.clear();
2124         }
2125     }
2126 }
2127
2128 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2129     npbcdim(ePBC2npbcdim(ir.ePBC)),
2130     numBoundedDimensions(inputrec2nboundeddim(&ir)),
2131     ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2132     haveScrewPBC(ir.ePBC == epbcSCREW)
2133 {
2134 }
2135
2136 /*! \brief Generate the simulation system information */
2137 static DDSystemInfo
2138 getSystemInfo(const gmx::MDLogger           &mdlog,
2139               t_commrec                     *cr,
2140               const DomdecOptions           &options,
2141               const gmx_mtop_t              *mtop,
2142               const t_inputrec              *ir,
2143               const matrix                   box,
2144               gmx::ArrayRef<const gmx::RVec> xGlobal)
2145 {
2146     const real         tenPercentMargin = 1.1;
2147
2148     DDSystemInfo       systemInfo;
2149
2150     /* We need to decide on update groups early, as this affects communication distances */
2151     systemInfo.useUpdateGroups = false;
2152     if (ir->cutoff_scheme == ecutsVERLET)
2153     {
2154         real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2155         setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2156     }
2157
2158     // TODO: Check whether all bondeds are within update groups
2159     systemInfo.haveInterDomainBondeds          = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2160                                                   mtop->bIntermolecularInteractions);
2161     systemInfo.haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2162
2163     if (systemInfo.useUpdateGroups)
2164     {
2165         systemInfo.haveSplitConstraints = false;
2166         systemInfo.haveSplitSettles     = false;
2167     }
2168     else
2169     {
2170         systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2171         systemInfo.haveSplitSettles     = gmx::inter_charge_group_settles(*mtop);
2172     }
2173
2174     if (ir->rlist == 0)
2175     {
2176         /* Set the cut-off to some very large value,
2177          * so we don't need if statements everywhere in the code.
2178          * We use sqrt, since the cut-off is squared in some places.
2179          */
2180         systemInfo.cutoff = GMX_CUTOFF_INF;
2181     }
2182     else
2183     {
2184         systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2185     }
2186     systemInfo.minCutoffForMultiBody = 0;
2187
2188     /* Determine the minimum cell size limit, affected by many factors */
2189     systemInfo.cellsizeLimit             = 0;
2190     systemInfo.filterBondedCommunication = false;
2191
2192     /* We do not allow home atoms to move beyond the neighboring domain
2193      * between domain decomposition steps, which limits the cell size.
2194      * Get an estimate of cell size limit due to atom displacement.
2195      * In most cases this is a large overestimate, because it assumes
2196      * non-interaction atoms.
2197      * We set the chance to 1 in a trillion steps.
2198      */
2199     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2200     const real     limitForAtomDisplacement          =
2201         minCellSizeForAtomDisplacement(*mtop, *ir,
2202                                        systemInfo.updateGroupingPerMoleculetype,
2203                                        c_chanceThatAtomMovesBeyondDomain);
2204     GMX_LOG(mdlog.info).appendTextFormatted(
2205             "Minimum cell size due to atom displacement: %.3f nm",
2206             limitForAtomDisplacement);
2207
2208     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2209                                         limitForAtomDisplacement);
2210
2211     /* TODO: PME decomposition currently requires atoms not to be more than
2212      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2213      *       In nearly all cases, limitForAtomDisplacement will be smaller
2214      *       than 2/3*rlist, so the PME requirement is satisfied.
2215      *       But it would be better for both correctness and performance
2216      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2217      *       Note that we would need to improve the pairlist buffer case.
2218      */
2219
2220     if (systemInfo.haveInterDomainBondeds)
2221     {
2222         if (options.minimumCommunicationRange > 0)
2223         {
2224             systemInfo.minCutoffForMultiBody =
2225                 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2226             if (options.useBondedCommunication)
2227             {
2228                 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2229             }
2230             else
2231             {
2232                 systemInfo.cutoff = std::max(systemInfo.cutoff,
2233                                              systemInfo.minCutoffForMultiBody);
2234             }
2235         }
2236         else if (ir->bPeriodicMols)
2237         {
2238             /* Can not easily determine the required cut-off */
2239             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2240             systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2241         }
2242         else
2243         {
2244             real r_2b, r_mb;
2245
2246             if (MASTER(cr))
2247             {
2248                 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2249                                       options.checkBondedInteractions,
2250                                       &r_2b, &r_mb);
2251             }
2252             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2253             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2254
2255             /* We use an initial margin of 10% for the minimum cell size,
2256              * except when we are just below the non-bonded cut-off.
2257              */
2258             if (options.useBondedCommunication)
2259             {
2260                 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2261                 {
2262                     const real r_bonded              = std::max(r_2b, r_mb);
2263                     systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2264                     /* This is the (only) place where we turn on the filtering */
2265                     systemInfo.filterBondedCommunication = true;
2266                 }
2267                 else
2268                 {
2269                     const real r_bonded              = r_mb;
2270                     systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2271                                                                 systemInfo.cutoff);
2272                 }
2273                 /* We determine cutoff_mbody later */
2274                 systemInfo.increaseMultiBodyCutoff = true;
2275             }
2276             else
2277             {
2278                 /* No special bonded communication,
2279                  * simply increase the DD cut-off.
2280                  */
2281                 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2282                 systemInfo.cutoff                = std::max(systemInfo.cutoff,
2283                                                             systemInfo.minCutoffForMultiBody);
2284             }
2285         }
2286         GMX_LOG(mdlog.info).appendTextFormatted(
2287                 "Minimum cell size due to bonded interactions: %.3f nm",
2288                 systemInfo.minCutoffForMultiBody);
2289
2290         systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2291                                             systemInfo.minCutoffForMultiBody);
2292     }
2293
2294     systemInfo.constraintCommunicationRange = 0;
2295     if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2296     {
2297         /* There is a cell size limit due to the constraints (P-LINCS) */
2298         systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2299         GMX_LOG(mdlog.info).appendTextFormatted(
2300                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2301                 systemInfo.constraintCommunicationRange);
2302         if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2303         {
2304             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2305         }
2306     }
2307     else if (options.constraintCommunicationRange > 0)
2308     {
2309         /* Here we do not check for dd->splitConstraints.
2310          * because one can also set a cell size limit for virtual sites only
2311          * and at this point we don't know yet if there are intercg v-sites.
2312          */
2313         GMX_LOG(mdlog.info).appendTextFormatted(
2314                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2315                 options.constraintCommunicationRange);
2316         systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2317     }
2318     systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2319                                         systemInfo.constraintCommunicationRange);
2320
2321     return systemInfo;
2322 }
2323
2324 /*! \brief Set the cell size and interaction limits, as well as the DD grid
2325  *
2326  * Also computes the initial ddbox.
2327  */
2328 static DDGridSetup
2329 getDDGridSetup(const gmx::MDLogger           &mdlog,
2330                t_commrec                     *cr,
2331                const DomdecOptions           &options,
2332                const DDSettings              &ddSettings,
2333                const DDSystemInfo            &systemInfo,
2334                const gmx_mtop_t              *mtop,
2335                const t_inputrec              *ir,
2336                const matrix                   box,
2337                gmx::ArrayRef<const gmx::RVec> xGlobal,
2338                gmx_ddbox_t                   *ddbox)
2339 {
2340     DDGridSetup ddGridSetup;
2341
2342     if (options.numCells[XX] > 0)
2343     {
2344         copy_ivec(options.numCells, ddGridSetup.numDomains);
2345         set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
2346
2347         if (options.numPmeRanks >= 0)
2348         {
2349             ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
2350         }
2351         else
2352         {
2353             /* When the DD grid is set explicitly and -npme is set to auto,
2354              * don't use PME ranks. We check later if the DD grid is
2355              * compatible with the total number of ranks.
2356              */
2357             ddGridSetup.numPmeOnlyRanks = 0;
2358         }
2359     }
2360     else
2361     {
2362         set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2363
2364         /* We need to choose the optimal DD grid and possibly PME nodes */
2365         ddGridSetup =
2366             dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
2367                            options.numPmeRanks,
2368                            ddSettings.request1DAnd1Pulse,
2369                            !isDlbDisabled(ddSettings.initialDlbState),
2370                            options.dlbScaling,
2371                            systemInfo);
2372
2373         if (ddGridSetup.numDomains[XX] == 0)
2374         {
2375             char     buf[STRLEN];
2376             gmx_bool bC = (systemInfo.haveSplitConstraints &&
2377                            systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2378             sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2379                     !bC ? "-rdd" : "-rcon",
2380                     ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2381                     bC ? " or your LINCS settings" : "");
2382
2383             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2384                                  "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2385                                  "%s\n"
2386                                  "Look in the log file for details on the domain decomposition",
2387                                  cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2388                                  ddGridSetup.cellsizeLimit,
2389                                  buf);
2390         }
2391     }
2392
2393     const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
2394     if (acs < systemInfo.cellsizeLimit)
2395     {
2396         if (options.numCells[XX] <= 0)
2397         {
2398             GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2399         }
2400         else
2401         {
2402             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2403                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2404                                  acs, systemInfo.cellsizeLimit);
2405         }
2406     }
2407
2408     const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2409     if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2410     {
2411         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2412                              "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2413                              numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2414     }
2415     if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2416     {
2417         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2418                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2419     }
2420
2421     ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
2422                                              ddGridSetup.ddDimensions);
2423
2424     return ddGridSetup;
2425 }
2426
2427 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2428 static DDRankSetup
2429 getDDRankSetup(const gmx::MDLogger &mdlog,
2430                t_commrec           *cr,
2431                const DDGridSetup   &ddGridSetup,
2432                const t_inputrec    &ir)
2433 {
2434     GMX_LOG(mdlog.info).appendTextFormatted(
2435             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2436             ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2437             ddGridSetup.numPmeOnlyRanks);
2438
2439     DDRankSetup ddRankSetup;
2440
2441     ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2442     copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2443
2444     ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2445     if (ddRankSetup.usePmeOnlyRanks)
2446     {
2447         ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2448     }
2449     else
2450     {
2451         ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2452     }
2453
2454     if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2455     {
2456         /* The following choices should match those
2457          * in comm_cost_est in domdec_setup.c.
2458          * Note that here the checks have to take into account
2459          * that the decomposition might occur in a different order than xyz
2460          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2461          * in which case they will not match those in comm_cost_est,
2462          * but since that is mainly for testing purposes that's fine.
2463          */
2464         if (ddGridSetup.numDDDimensions >= 2 &&
2465             ddGridSetup.ddDimensions[0] == XX &&
2466             ddGridSetup.ddDimensions[1] == YY &&
2467             ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2468             ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2469             getenv("GMX_PMEONEDD") == nullptr)
2470         {
2471             ddRankSetup.npmedecompdim = 2;
2472             ddRankSetup.npmenodes_x   = ddGridSetup.numDomains[XX];
2473             ddRankSetup.npmenodes_y   = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2474         }
2475         else
2476         {
2477             /* In case nc is 1 in both x and y we could still choose to
2478              * decompose pme in y instead of x, but we use x for simplicity.
2479              */
2480             ddRankSetup.npmedecompdim = 1;
2481             if (ddGridSetup.ddDimensions[0] == YY)
2482             {
2483                 ddRankSetup.npmenodes_x = 1;
2484                 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2485             }
2486             else
2487             {
2488                 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2489                 ddRankSetup.npmenodes_y = 1;
2490             }
2491         }
2492         GMX_LOG(mdlog.info).appendTextFormatted(
2493                 "PME domain decomposition: %d x %d x %d",
2494                 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2495     }
2496     else
2497     {
2498         ddRankSetup.npmedecompdim = 0;
2499         ddRankSetup.npmenodes_x   = 0;
2500         ddRankSetup.npmenodes_y   = 0;
2501     }
2502
2503     return ddRankSetup;
2504 }
2505
2506 /*! \brief Set the cell size and interaction limits */
2507 static void set_dd_limits(const gmx::MDLogger &mdlog,
2508                           t_commrec *cr, gmx_domdec_t *dd,
2509                           const DomdecOptions &options,
2510                           const DDSettings &ddSettings,
2511                           const DDSystemInfo &systemInfo,
2512                           const DDGridSetup &ddGridSetup,
2513                           const gmx_mtop_t *mtop,
2514                           const t_inputrec *ir,
2515                           const gmx_ddbox_t &ddbox)
2516 {
2517     gmx_domdec_comm_t *comm = dd->comm;
2518     comm->ddSettings        = ddSettings;
2519
2520     /* Initialize to GPU share count to 0, might change later */
2521     comm->nrank_gpu_shared = 0;
2522
2523     comm->dlbState         = comm->ddSettings.initialDlbState;
2524     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2525     /* To consider turning DLB on after 2*nstlist steps we need to check
2526      * at partitioning count 3. Thus we need to increase the first count by 2.
2527      */
2528     comm->ddPartioningCountFirstDlbOff += 2;
2529
2530     comm->bPMELoadBalDLBLimits          = FALSE;
2531
2532     /* Allocate the charge group/atom sorting struct */
2533     comm->sort = std::make_unique<gmx_domdec_sort_t>();
2534
2535     comm->systemInfo = systemInfo;
2536
2537     if (systemInfo.useUpdateGroups)
2538     {
2539         /* Note: We would like to use dd->nnodes for the atom count estimate,
2540          *       but that is not yet available here. But this anyhow only
2541          *       affect performance up to the second dd_partition_system call.
2542          */
2543         const int homeAtomCountEstimate =  mtop->natoms/cr->nnodes;
2544         comm->updateGroupsCog =
2545             std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2546                                                    systemInfo.updateGroupingPerMoleculetype,
2547                                                    maxReferenceTemperature(*ir),
2548                                                    homeAtomCountEstimate);
2549     }
2550
2551     comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2552
2553     /* Set the DD setup given by ddGridSetup */
2554     copy_ivec(ddGridSetup.numDomains, dd->nc);
2555     dd->ndim = ddGridSetup.numDDDimensions;
2556     copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2557
2558     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2559
2560     snew(comm->slb_frac, DIM);
2561     if (isDlbDisabled(comm))
2562     {
2563         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2564         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2565         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2566     }
2567
2568     /* Set the multi-body cut-off and cellsize limit for DLB */
2569     comm->cutoff_mbody   = systemInfo.minCutoffForMultiBody;
2570     comm->cellsize_limit = systemInfo.cellsizeLimit;
2571     if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2572     {
2573         if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2574         {
2575             /* Set the bonded communication distance to halfway
2576              * the minimum and the maximum,
2577              * since the extra communication cost is nearly zero.
2578              */
2579             real acs           = average_cellsize_min(ddbox, dd->nc);
2580             comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2581             if (!isDlbDisabled(comm))
2582             {
2583                 /* Check if this does not limit the scaling */
2584                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2585                                               options.dlbScaling*acs);
2586             }
2587             if (!systemInfo.filterBondedCommunication)
2588             {
2589                 /* Without bBondComm do not go beyond the n.b. cut-off */
2590                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2591                 if (comm->cellsize_limit >= systemInfo.cutoff)
2592                 {
2593                     /* We don't loose a lot of efficieny
2594                      * when increasing it to the n.b. cut-off.
2595                      * It can even be slightly faster, because we need
2596                      * less checks for the communication setup.
2597                      */
2598                     comm->cutoff_mbody = systemInfo.cutoff;
2599                 }
2600             }
2601             /* Check if we did not end up below our original limit */
2602             comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2603                                           systemInfo.minCutoffForMultiBody);
2604
2605             if (comm->cutoff_mbody > comm->cellsize_limit)
2606             {
2607                 comm->cellsize_limit = comm->cutoff_mbody;
2608             }
2609         }
2610         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2611     }
2612
2613     if (debug)
2614     {
2615         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2616                 "cellsize limit %f\n",
2617                 gmx::boolToString(systemInfo.filterBondedCommunication),
2618                 comm->cellsize_limit);
2619     }
2620
2621     if (MASTER(cr))
2622     {
2623         check_dd_restrictions(dd, ir, mdlog);
2624     }
2625 }
2626
2627 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2628 {
2629     int   ncg, cg;
2630     char *bLocalCG;
2631
2632     ncg = ncg_mtop(mtop);
2633     snew(bLocalCG, ncg);
2634     for (cg = 0; cg < ncg; cg++)
2635     {
2636         bLocalCG[cg] = FALSE;
2637     }
2638
2639     return bLocalCG;
2640 }
2641
2642 void dd_init_bondeds(FILE *fplog,
2643                      gmx_domdec_t *dd,
2644                      const gmx_mtop_t *mtop,
2645                      const gmx_vsite_t *vsite,
2646                      const t_inputrec *ir,
2647                      gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2648 {
2649     gmx_domdec_comm_t *comm;
2650
2651     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2652
2653     comm = dd->comm;
2654
2655     if (comm->systemInfo.filterBondedCommunication)
2656     {
2657         /* Communicate atoms beyond the cut-off for bonded interactions */
2658         comm = dd->comm;
2659
2660         comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2661
2662         comm->bLocalCG = init_bLocalCG(mtop);
2663     }
2664     else
2665     {
2666         /* Only communicate atoms based on cut-off */
2667         comm->cglink   = nullptr;
2668         comm->bLocalCG = nullptr;
2669     }
2670 }
2671
2672 static void writeSettings(gmx::TextWriter       *log,
2673                           gmx_domdec_t          *dd,
2674                           const gmx_mtop_t      *mtop,
2675                           const t_inputrec      *ir,
2676                           gmx_bool               bDynLoadBal,
2677                           real                   dlb_scale,
2678                           const gmx_ddbox_t     *ddbox)
2679 {
2680     gmx_domdec_comm_t *comm;
2681     int                d;
2682     ivec               np;
2683     real               limit, shrink;
2684
2685     comm = dd->comm;
2686
2687     if (bDynLoadBal)
2688     {
2689         log->writeString("The maximum number of communication pulses is:");
2690         for (d = 0; d < dd->ndim; d++)
2691         {
2692             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2693         }
2694         log->ensureLineBreak();
2695         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2696         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2697         log->writeString("The allowed shrink of domain decomposition cells is:");
2698         for (d = 0; d < DIM; d++)
2699         {
2700             if (dd->nc[d] > 1)
2701             {
2702                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2703                 {
2704                     shrink = 0;
2705                 }
2706                 else
2707                 {
2708                     shrink =
2709                         comm->cellsize_min_dlb[d]/
2710                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2711                 }
2712                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2713             }
2714         }
2715         log->ensureLineBreak();
2716     }
2717     else
2718     {
2719         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2720         log->writeString("The initial number of communication pulses is:");
2721         for (d = 0; d < dd->ndim; d++)
2722         {
2723             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2724         }
2725         log->ensureLineBreak();
2726         log->writeString("The initial domain decomposition cell size is:");
2727         for (d = 0; d < DIM; d++)
2728         {
2729             if (dd->nc[d] > 1)
2730             {
2731                 log->writeStringFormatted(" %c %.2f nm",
2732                                           dim2char(d), dd->comm->cellsize_min[d]);
2733             }
2734         }
2735         log->ensureLineBreak();
2736         log->writeLine();
2737     }
2738
2739     const bool haveInterDomainVsites =
2740         (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2741
2742     if (comm->systemInfo.haveInterDomainBondeds ||
2743         haveInterDomainVsites ||
2744         comm->systemInfo.haveSplitConstraints ||
2745         comm->systemInfo.haveSplitSettles)
2746     {
2747         std::string decompUnits;
2748         if (comm->systemInfo.useUpdateGroups)
2749         {
2750             decompUnits = "atom groups";
2751         }
2752         else
2753         {
2754             decompUnits = "atoms";
2755         }
2756
2757         log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2758         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2759
2760         if (bDynLoadBal)
2761         {
2762             limit = dd->comm->cellsize_limit;
2763         }
2764         else
2765         {
2766             if (dd->unitCellInfo.ddBoxIsDynamic)
2767             {
2768                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2769             }
2770             limit = dd->comm->cellsize_min[XX];
2771             for (d = 1; d < DIM; d++)
2772             {
2773                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2774             }
2775         }
2776
2777         if (comm->systemInfo.haveInterDomainBondeds)
2778         {
2779             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2780                                     "two-body bonded interactions", "(-rdd)",
2781                                     std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2782             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2783                                     "multi-body bonded interactions", "(-rdd)",
2784                                     (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2785         }
2786         if (haveInterDomainVsites)
2787         {
2788             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2789                                     "virtual site constructions", "(-rcon)", limit);
2790         }
2791         if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2792         {
2793             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2794                                                        1+ir->nProjOrder);
2795             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2796                                     separation.c_str(), "(-rcon)", limit);
2797         }
2798         log->ensureLineBreak();
2799     }
2800 }
2801
2802 static void logSettings(const gmx::MDLogger &mdlog,
2803                         gmx_domdec_t        *dd,
2804                         const gmx_mtop_t    *mtop,
2805                         const t_inputrec    *ir,
2806                         real                 dlb_scale,
2807                         const gmx_ddbox_t   *ddbox)
2808 {
2809     gmx::StringOutputStream stream;
2810     gmx::TextWriter         log(&stream);
2811     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2812     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2813     {
2814         {
2815             log.ensureEmptyLine();
2816             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2817         }
2818         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2819     }
2820     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2821 }
2822
2823 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2824                                 gmx_domdec_t        *dd,
2825                                 real                 dlb_scale,
2826                                 const t_inputrec    *ir,
2827                                 const gmx_ddbox_t   *ddbox)
2828 {
2829     gmx_domdec_comm_t *comm;
2830     int                d, dim, npulse, npulse_d_max, npulse_d;
2831     gmx_bool           bNoCutOff;
2832
2833     comm = dd->comm;
2834
2835     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2836
2837     /* Determine the maximum number of comm. pulses in one dimension */
2838
2839     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2840
2841     /* Determine the maximum required number of grid pulses */
2842     if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2843     {
2844         /* Only a single pulse is required */
2845         npulse = 1;
2846     }
2847     else if (!bNoCutOff && comm->cellsize_limit > 0)
2848     {
2849         /* We round down slightly here to avoid overhead due to the latency
2850          * of extra communication calls when the cut-off
2851          * would be only slightly longer than the cell size.
2852          * Later cellsize_limit is redetermined,
2853          * so we can not miss interactions due to this rounding.
2854          */
2855         npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2856     }
2857     else
2858     {
2859         /* There is no cell size limit */
2860         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2861     }
2862
2863     if (!bNoCutOff && npulse > 1)
2864     {
2865         /* See if we can do with less pulses, based on dlb_scale */
2866         npulse_d_max = 0;
2867         for (d = 0; d < dd->ndim; d++)
2868         {
2869             dim      = dd->dim[d];
2870             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2871                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2872             npulse_d_max = std::max(npulse_d_max, npulse_d);
2873         }
2874         npulse = std::min(npulse, npulse_d_max);
2875     }
2876
2877     /* This env var can override npulse */
2878     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2879     if (d > 0)
2880     {
2881         npulse = d;
2882     }
2883
2884     comm->maxpulse       = 1;
2885     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2886     for (d = 0; d < dd->ndim; d++)
2887     {
2888         if (comm->ddSettings.request1DAnd1Pulse)
2889         {
2890             comm->cd[d].np_dlb = 1;
2891         }
2892         else
2893         {
2894             comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2895             comm->maxpulse     = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2896         }
2897         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2898         {
2899             comm->bVacDLBNoLimit = FALSE;
2900         }
2901     }
2902
2903     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2904     if (!comm->bVacDLBNoLimit)
2905     {
2906         comm->cellsize_limit = std::max(comm->cellsize_limit,
2907                                         comm->systemInfo.cutoff/comm->maxpulse);
2908     }
2909     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2910     /* Set the minimum cell size for each DD dimension */
2911     for (d = 0; d < dd->ndim; d++)
2912     {
2913         if (comm->bVacDLBNoLimit ||
2914             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2915         {
2916             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2917         }
2918         else
2919         {
2920             comm->cellsize_min_dlb[dd->dim[d]] =
2921                 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2922         }
2923     }
2924     if (comm->cutoff_mbody <= 0)
2925     {
2926         comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2927     }
2928     if (isDlbOn(comm))
2929     {
2930         set_dlb_limits(dd);
2931     }
2932 }
2933
2934 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2935 {
2936     /* If each molecule is a single charge group
2937      * or we use domain decomposition for each periodic dimension,
2938      * we do not need to take pbc into account for the bonded interactions.
2939      */
2940     return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2941             !(dd->nc[XX] > 1 &&
2942               dd->nc[YY] > 1 &&
2943               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2944 }
2945
2946 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2947 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2948                                   gmx_domdec_t *dd, real dlb_scale,
2949                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2950                                   const gmx_ddbox_t *ddbox)
2951 {
2952     gmx_domdec_comm_t *comm        = dd->comm;
2953     DDRankSetup       &ddRankSetup = comm->ddRankSetup;
2954
2955     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2956     {
2957         init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2958         if (ddRankSetup.npmedecompdim >= 2)
2959         {
2960             init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2961         }
2962     }
2963     else
2964     {
2965         ddRankSetup.numRanksDoingPme = 0;
2966         if (dd->pme_nodeid >= 0)
2967         {
2968             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2969                                  "Can not have separate PME ranks without PME electrostatics");
2970         }
2971     }
2972
2973     if (debug)
2974     {
2975         fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2976     }
2977     if (!isDlbDisabled(comm))
2978     {
2979         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2980     }
2981
2982     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2983
2984     real vol_frac;
2985     if (ir->ePBC == epbcNONE)
2986     {
2987         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2988     }
2989     else
2990     {
2991         vol_frac =
2992             (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
2993     }
2994     if (debug)
2995     {
2996         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2997     }
2998     int natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2999
3000     dd->ga2la      = new gmx_ga2la_t(natoms_tot,
3001                                      static_cast<int>(vol_frac*natoms_tot));
3002 }
3003
3004 /*! \brief Get some important DD parameters which can be modified by env.vars */
3005 static DDSettings
3006 getDDSettings(const gmx::MDLogger     &mdlog,
3007               const DomdecOptions     &options,
3008               const gmx::MdrunOptions &mdrunOptions,
3009               const t_inputrec        &ir)
3010 {
3011     DDSettings ddSettings;
3012
3013     ddSettings.useSendRecv2        = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
3014     ddSettings.dlb_scale_lim       = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
3015     // TODO GPU halo exchange requires a 1D single-pulse DD, and when
3016     // it is properly integrated the hack with GMX_GPU_DD_COMMS should
3017     // be removed.
3018     ddSettings.request1DAnd1Pulse  = (bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0)) ||
3019                                       (bool(getenv("GMX_GPU_DD_COMMS") != nullptr &&
3020                                             GMX_THREAD_MPI &&
3021                                             (GMX_GPU == GMX_GPU_CUDA))));
3022     ddSettings.useDDOrderZYX       = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
3023     ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
3024     ddSettings.eFlop               = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
3025     const int recload              = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
3026     ddSettings.nstDDDump           = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
3027     ddSettings.nstDDDumpGrid       = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
3028     ddSettings.DD_debug            = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
3029
3030     if (ddSettings.useSendRecv2)
3031     {
3032         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
3033     }
3034
3035     if (ddSettings.eFlop)
3036     {
3037         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
3038         ddSettings.recordLoad = true;
3039     }
3040     else
3041     {
3042         ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
3043     }
3044
3045     ddSettings.initialDlbState =
3046         determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
3047     GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
3048                                             edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
3049
3050     return ddSettings;
3051 }
3052
3053 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
3054     unitCellInfo(ir)
3055 {
3056 }
3057
3058 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
3059                                         t_commrec                     *cr,
3060                                         const DomdecOptions           &options,
3061                                         const gmx::MdrunOptions       &mdrunOptions,
3062                                         const gmx_mtop_t              *mtop,
3063                                         const t_inputrec              *ir,
3064                                         const matrix                   box,
3065                                         gmx::ArrayRef<const gmx::RVec> xGlobal,
3066                                         gmx::LocalAtomSetManager      *atomSets)
3067 {
3068     GMX_LOG(mdlog.info).appendTextFormatted(
3069             "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3070
3071     DDSettings  ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3072     if (ddSettings.eFlop > 1)
3073     {
3074         /* Ensure that we have different random flop counts on different ranks */
3075         srand(1 + cr->nodeid);
3076     }
3077
3078     DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3079
3080     gmx_ddbox_t  ddbox       = {0};
3081     DDGridSetup  ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
3082                                               mtop, ir, box, xGlobal, &ddbox);
3083
3084     cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3085
3086     DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3087
3088     /* Generate the group communicator, also decides the duty of each rank */
3089     ivec               ddCellIndex = { 0, 0, 0 };
3090     std::vector<int>   pmeRanks;
3091     CartesianRankSetup cartSetup   =
3092         makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3093                                ddRankSetup, cr,
3094                                ddCellIndex, &pmeRanks);
3095
3096     gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3097
3098     copy_ivec(ddCellIndex, dd->ci);
3099
3100     dd->comm = init_dd_comm();
3101
3102     dd->comm->ddRankSetup        = ddRankSetup;
3103     dd->comm->cartesianRankSetup = cartSetup;
3104
3105     set_dd_limits(mdlog, cr, dd, options,
3106                   ddSettings, systemInfo, ddGridSetup,
3107                   mtop, ir,
3108                   ddbox);
3109
3110     setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, dd);
3111
3112     if (thisRankHasDuty(cr, DUTY_PP))
3113     {
3114         set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3115
3116         setup_neighbor_relations(dd);
3117     }
3118
3119     /* Set overallocation to avoid frequent reallocation of arrays */
3120     set_over_alloc_dd(TRUE);
3121
3122     dd->atomSets = atomSets;
3123
3124     return dd;
3125 }
3126
3127 static gmx_bool test_dd_cutoff(t_commrec                     *cr,
3128                                const matrix                   box,
3129                                gmx::ArrayRef<const gmx::RVec> x,
3130                                real                           cutoffRequested)
3131 {
3132     gmx_domdec_t *dd;
3133     gmx_ddbox_t   ddbox;
3134     int           d, dim, np;
3135     real          inv_cell_size;
3136     int           LocallyLimited;
3137
3138     dd = cr->dd;
3139
3140     set_ddbox(*dd, false, box, true, x, &ddbox);
3141
3142     LocallyLimited = 0;
3143
3144     for (d = 0; d < dd->ndim; d++)
3145     {
3146         dim = dd->dim[d];
3147
3148         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3149         if (dd->unitCellInfo.ddBoxIsDynamic)
3150         {
3151             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3152         }
3153
3154         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3155
3156         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3157         {
3158             if (np > dd->comm->cd[d].np_dlb)
3159             {
3160                 return FALSE;
3161             }
3162
3163             /* If a current local cell size is smaller than the requested
3164              * cut-off, we could still fix it, but this gets very complicated.
3165              * Without fixing here, we might actually need more checks.
3166              */
3167             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3168             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3169             {
3170                 LocallyLimited = 1;
3171             }
3172         }
3173     }
3174
3175     if (!isDlbDisabled(dd->comm))
3176     {
3177         /* If DLB is not active yet, we don't need to check the grid jumps.
3178          * Actually we shouldn't, because then the grid jump data is not set.
3179          */
3180         if (isDlbOn(dd->comm) &&
3181             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3182         {
3183             LocallyLimited = 1;
3184         }
3185
3186         gmx_sumi(1, &LocallyLimited, cr);
3187
3188         if (LocallyLimited > 0)
3189         {
3190             return FALSE;
3191         }
3192     }
3193
3194     return TRUE;
3195 }
3196
3197 gmx_bool change_dd_cutoff(t_commrec                     *cr,
3198                           const matrix                   box,
3199                           gmx::ArrayRef<const gmx::RVec> x,
3200                           real                           cutoffRequested)
3201 {
3202     gmx_bool bCutoffAllowed;
3203
3204     bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3205
3206     if (bCutoffAllowed)
3207     {
3208         cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3209     }
3210
3211     return bCutoffAllowed;
3212 }