src/gromacs/domdec/domdec.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #include "gmxpre.h"
  37
  38 #include "domdec.h"
  39
  40 #include "config.h"
  41
  42 #include <cassert>
  43 #include <cinttypes>
  44 #include <climits>
  45 #include <cmath>
  46 #include <cstdio>
  47 #include <cstdlib>
  48 #include <cstring>
  49
  50 #include <algorithm>
  51
  52 #include "gromacs/compat/make_unique.h"
  53 #include "gromacs/domdec/collect.h"
  54 #include "gromacs/domdec/dlb.h"
  55 #include "gromacs/domdec/dlbtiming.h"
  56 #include "gromacs/domdec/domdec_network.h"
  57 #include "gromacs/domdec/ga2la.h"
  58 #include "gromacs/domdec/partition.h"
  59 #include "gromacs/gmxlib/network.h"
  60 #include "gromacs/gmxlib/nrnb.h"
  61 #include "gromacs/gpu_utils/gpu_utils.h"
  62 #include "gromacs/hardware/hw_info.h"
  63 #include "gromacs/listed-forces/manage-threading.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/math/vectypes.h"
  66 #include "gromacs/mdlib/calc_verletbuf.h"
  67 #include "gromacs/mdlib/constr.h"
  68 #include "gromacs/mdlib/constraintrange.h"
  69 #include "gromacs/mdlib/mdrun.h"
  70 #include "gromacs/mdlib/updategroups.h"
  71 #include "gromacs/mdlib/vsite.h"
  72 #include "gromacs/mdtypes/commrec.h"
  73 #include "gromacs/mdtypes/inputrec.h"
  74 #include "gromacs/mdtypes/state.h"
  75 #include "gromacs/pbcutil/ishift.h"
  76 #include "gromacs/pbcutil/pbc.h"
  77 #include "gromacs/pulling/pull.h"
  78 #include "gromacs/timing/wallcycle.h"
  79 #include "gromacs/topology/block.h"
  80 #include "gromacs/topology/idef.h"
  81 #include "gromacs/topology/ifunc.h"
  82 #include "gromacs/topology/mtop_lookup.h"
  83 #include "gromacs/topology/mtop_util.h"
  84 #include "gromacs/topology/topology.h"
  85 #include "gromacs/utility/basedefinitions.h"
  86 #include "gromacs/utility/basenetwork.h"
  87 #include "gromacs/utility/cstringutil.h"
  88 #include "gromacs/utility/exceptions.h"
  89 #include "gromacs/utility/fatalerror.h"
  90 #include "gromacs/utility/gmxmpi.h"
  91 #include "gromacs/utility/logger.h"
  92 #include "gromacs/utility/real.h"
  93 #include "gromacs/utility/smalloc.h"
  94 #include "gromacs/utility/strconvert.h"
  95 #include "gromacs/utility/stringstream.h"
  96 #include "gromacs/utility/stringutil.h"
  97 #include "gromacs/utility/textwriter.h"
  98
  99 #include "atomdistribution.h"
 100 #include "box.h"
 101 #include "cellsizes.h"
 102 #include "distribute.h"
 103 #include "domdec_constraints.h"
 104 #include "domdec_internal.h"
 105 #include "domdec_vsite.h"
 106 #include "redistribute.h"
 107 #include "utility.h"
 108
 109 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
 110
 111 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 112 #define DD_CGIBS 2
 113
 114 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 115 #define DD_FLAG_NRCG  65535
 116 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
 117 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 118
 119 /* The DD zone order */
 120 static const ivec dd_zo[DD_MAXZONE] =
 121 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
 122
 123 /* The non-bonded zone-pair setup for domain decomposition
 124  * The first number is the i-zone, the second number the first j-zone seen by
 125  * this i-zone, the third number the last+1 j-zone seen by this i-zone.
 126  * As is, this is for 3D decomposition, where there are 4 i-zones.
 127  * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
 128  * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
 129  */
 130 static const int
 131     ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
 132                                                  {1, 3, 6},
 133                                                  {2, 5, 6},
 134                                                  {3, 5, 7}};
 135
 136
 137 /*
 138    #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 139
 140    static void index2xyz(ivec nc,int ind,ivec xyz)
 141    {
 142    xyz[XX] = ind % nc[XX];
 143    xyz[YY] = (ind / nc[XX]) % nc[YY];
 144    xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 145    }
 146  */
 147
 148 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
 149 {
 150     xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 151     xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 152     xyz[ZZ] = ind % nc[ZZ];
 153 }
 154
 155 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
 156 {
 157     int ddindex;
 158     int ddnodeid = -1;
 159
 160     ddindex = dd_index(dd->nc, c);
 161     if (dd->comm->bCartesianPP_PME)
 162     {
 163         ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 164     }
 165     else if (dd->comm->bCartesianPP)
 166     {
 167 #if GMX_MPI
 168         MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
 169 #endif
 170     }
 171     else
 172     {
 173         ddnodeid = ddindex;
 174     }
 175
 176     return ddnodeid;
 177 }
 178
 179 int ddglatnr(const gmx_domdec_t *dd, int i)
 180 {
 181     int atnr;
 182
 183     if (dd == nullptr)
 184     {
 185         atnr = i + 1;
 186     }
 187     else
 188     {
 189         if (i >= dd->comm->atomRanges.numAtomsTotal())
 190         {
 191             gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
 192         }
 193         atnr = dd->globalAtomIndices[i] + 1;
 194     }
 195
 196     return atnr;
 197 }
 198
 199 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 200 {
 201     return &dd->comm->cgs_gl;
 202 }
 203
 204 void dd_store_state(gmx_domdec_t *dd, t_state *state)
 205 {
 206     int i;
 207
 208     if (state->ddp_count != dd->ddp_count)
 209     {
 210         gmx_incons("The MD state does not match the domain decomposition state");
 211     }
 212
 213     state->cg_gl.resize(dd->ncg_home);
 214     for (i = 0; i < dd->ncg_home; i++)
 215     {
 216         state->cg_gl[i] = dd->globalAtomGroupIndices[i];
 217     }
 218
 219     state->ddp_count_cg_gl = dd->ddp_count;
 220 }
 221
 222 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 223 {
 224     return &dd->comm->zones;
 225 }
 226
 227 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
 228                       int *jcg0, int *jcg1, ivec shift0, ivec shift1)
 229 {
 230     gmx_domdec_zones_t *zones;
 231     int                 izone, d, dim;
 232
 233     zones = &dd->comm->zones;
 234
 235     izone = 0;
 236     while (icg >= zones->izone[izone].cg1)
 237     {
 238         izone++;
 239     }
 240
 241     if (izone == 0)
 242     {
 243         *jcg0 = icg;
 244     }
 245     else if (izone < zones->nizone)
 246     {
 247         *jcg0 = zones->izone[izone].jcg0;
 248     }
 249     else
 250     {
 251         gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
 252                   icg, izone, zones->nizone);
 253     }
 254
 255     *jcg1 = zones->izone[izone].jcg1;
 256
 257     for (d = 0; d < dd->ndim; d++)
 258     {
 259         dim         = dd->dim[d];
 260         shift0[dim] = zones->izone[izone].shift0[dim];
 261         shift1[dim] = zones->izone[izone].shift1[dim];
 262         if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
 263         {
 264             /* A conservative approach, this can be optimized */
 265             shift0[dim] -= 1;
 266             shift1[dim] += 1;
 267         }
 268     }
 269 }
 270
 271 int dd_numHomeAtoms(const gmx_domdec_t &dd)
 272 {
 273     return dd.comm->atomRanges.numHomeAtoms();
 274 }
 275
 276 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
 277 {
 278     /* We currently set mdatoms entries for all atoms:
 279      * local + non-local + communicated for vsite + constraints
 280      */
 281
 282     return dd->comm->atomRanges.numAtomsTotal();
 283 }
 284
 285 int dd_natoms_vsite(const gmx_domdec_t *dd)
 286 {
 287     return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
 288 }
 289
 290 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
 291 {
 292     *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
 293     *at_end   = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
 294 }
 295
 296 void dd_move_x(gmx_domdec_t             *dd,
 297                matrix                    box,
 298                gmx::ArrayRef<gmx::RVec>  x,
 299                gmx_wallcycle            *wcycle)
 300 {
 301     wallcycle_start(wcycle, ewcMOVEX);
 302
 303     int                    nzone, nat_tot;
 304     gmx_domdec_comm_t     *comm;
 305     gmx_domdec_comm_dim_t *cd;
 306     rvec                   shift = {0, 0, 0};
 307     gmx_bool               bPBC, bScrew;
 308
 309     comm = dd->comm;
 310
 311     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 312
 313     nzone   = 1;
 314     nat_tot = comm->atomRanges.numHomeAtoms();
 315     for (int d = 0; d < dd->ndim; d++)
 316     {
 317         bPBC   = (dd->ci[dd->dim[d]] == 0);
 318         bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 319         if (bPBC)
 320         {
 321             copy_rvec(box[dd->dim[d]], shift);
 322         }
 323         cd = &comm->cd[d];
 324         for (const gmx_domdec_ind_t &ind : cd->ind)
 325         {
 326             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 327             gmx::ArrayRef<gmx::RVec>  &sendBuffer = sendBufferAccess.buffer;
 328             int                        n          = 0;
 329             if (!bPBC)
 330             {
 331                 for (int g : ind.index)
 332                 {
 333                     for (int j : atomGrouping.block(g))
 334                     {
 335                         sendBuffer[n] = x[j];
 336                         n++;
 337                     }
 338                 }
 339             }
 340             else if (!bScrew)
 341             {
 342                 for (int g : ind.index)
 343                 {
 344                     for (int j : atomGrouping.block(g))
 345                     {
 346                         /* We need to shift the coordinates */
 347                         for (int d = 0; d < DIM; d++)
 348                         {
 349                             sendBuffer[n][d] = x[j][d] + shift[d];
 350                         }
 351                         n++;
 352                     }
 353                 }
 354             }
 355             else
 356             {
 357                 for (int g : ind.index)
 358                 {
 359                     for (int j : atomGrouping.block(g))
 360                     {
 361                         /* Shift x */
 362                         sendBuffer[n][XX] = x[j][XX] + shift[XX];
 363                         /* Rotate y and z.
 364                          * This operation requires a special shift force
 365                          * treatment, which is performed in calc_vir.
 366                          */
 367                         sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
 368                         sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 369                         n++;
 370                     }
 371                 }
 372             }
 373
 374             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 375
 376             gmx::ArrayRef<gmx::RVec>   receiveBuffer;
 377             if (cd->receiveInPlace)
 378             {
 379                 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
 380             }
 381             else
 382             {
 383                 receiveBuffer = receiveBufferAccess.buffer;
 384             }
 385             /* Send and receive the coordinates */
 386             ddSendrecv(dd, d, dddirBackward,
 387                        sendBuffer, receiveBuffer);
 388
 389             if (!cd->receiveInPlace)
 390             {
 391                 int j = 0;
 392                 for (int zone = 0; zone < nzone; zone++)
 393                 {
 394                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 395                     {
 396                         x[i] = receiveBuffer[j++];
 397                     }
 398                 }
 399             }
 400             nat_tot += ind.nrecv[nzone+1];
 401         }
 402         nzone += nzone;
 403     }
 404
 405     wallcycle_stop(wcycle, ewcMOVEX);
 406 }
 407
 408 void dd_move_f(gmx_domdec_t             *dd,
 409                gmx::ArrayRef<gmx::RVec>  f,
 410                rvec                     *fshift,
 411                gmx_wallcycle            *wcycle)
 412 {
 413     wallcycle_start(wcycle, ewcMOVEF);
 414
 415     int                    nzone, nat_tot;
 416     gmx_domdec_comm_t     *comm;
 417     gmx_domdec_comm_dim_t *cd;
 418     ivec                   vis;
 419     int                    is;
 420     gmx_bool               bShiftForcesNeedPbc, bScrew;
 421
 422     comm = dd->comm;
 423
 424     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 425
 426     nzone   = comm->zones.n/2;
 427     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 428     for (int d = dd->ndim-1; d >= 0; d--)
 429     {
 430         /* Only forces in domains near the PBC boundaries need to
 431            consider PBC in the treatment of fshift */
 432         bShiftForcesNeedPbc   = (dd->ci[dd->dim[d]] == 0);
 433         bScrew                = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
 434         if (fshift == nullptr && !bScrew)
 435         {
 436             bShiftForcesNeedPbc = FALSE;
 437         }
 438         /* Determine which shift vector we need */
 439         clear_ivec(vis);
 440         vis[dd->dim[d]] = 1;
 441         is              = IVEC2IS(vis);
 442
 443         cd = &comm->cd[d];
 444         for (int p = cd->numPulses() - 1; p >= 0; p--)
 445         {
 446             const gmx_domdec_ind_t    &ind  = cd->ind[p];
 447             DDBufferAccess<gmx::RVec>  receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 448             gmx::ArrayRef<gmx::RVec>  &receiveBuffer = receiveBufferAccess.buffer;
 449
 450             nat_tot                        -= ind.nrecv[nzone+1];
 451
 452             DDBufferAccess<gmx::RVec>  sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 453
 454             gmx::ArrayRef<gmx::RVec>   sendBuffer;
 455             if (cd->receiveInPlace)
 456             {
 457                 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
 458             }
 459             else
 460             {
 461                 sendBuffer = sendBufferAccess.buffer;
 462                 int j = 0;
 463                 for (int zone = 0; zone < nzone; zone++)
 464                 {
 465                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 466                     {
 467                         sendBuffer[j++] = f[i];
 468                     }
 469                 }
 470             }
 471             /* Communicate the forces */
 472             ddSendrecv(dd, d, dddirForward,
 473                        sendBuffer, receiveBuffer);
 474             /* Add the received forces */
 475             int n = 0;
 476             if (!bShiftForcesNeedPbc)
 477             {
 478                 for (int g : ind.index)
 479                 {
 480                     for (int j : atomGrouping.block(g))
 481                     {
 482                         for (int d = 0; d < DIM; d++)
 483                         {
 484                             f[j][d] += receiveBuffer[n][d];
 485                         }
 486                         n++;
 487                     }
 488                 }
 489             }
 490             else if (!bScrew)
 491             {
 492                 /* fshift should always be defined if this function is
 493                  * called when bShiftForcesNeedPbc is true */
 494                 assert(nullptr != fshift);
 495                 for (int g : ind.index)
 496                 {
 497                     for (int j : atomGrouping.block(g))
 498                     {
 499                         for (int d = 0; d < DIM; d++)
 500                         {
 501                             f[j][d] += receiveBuffer[n][d];
 502                         }
 503                         /* Add this force to the shift force */
 504                         for (int d = 0; d < DIM; d++)
 505                         {
 506                             fshift[is][d] += receiveBuffer[n][d];
 507                         }
 508                         n++;
 509                     }
 510                 }
 511             }
 512             else
 513             {
 514                 for (int g : ind.index)
 515                 {
 516                     for (int j : atomGrouping.block(g))
 517                     {
 518                         /* Rotate the force */
 519                         f[j][XX] += receiveBuffer[n][XX];
 520                         f[j][YY] -= receiveBuffer[n][YY];
 521                         f[j][ZZ] -= receiveBuffer[n][ZZ];
 522                         if (fshift)
 523                         {
 524                             /* Add this force to the shift force */
 525                             for (int d = 0; d < DIM; d++)
 526                             {
 527                                 fshift[is][d] += receiveBuffer[n][d];
 528                             }
 529                         }
 530                         n++;
 531                     }
 532                 }
 533             }
 534         }
 535         nzone /= 2;
 536     }
 537     wallcycle_stop(wcycle, ewcMOVEF);
 538 }
 539
 540 /* Convenience function for extracting a real buffer from an rvec buffer
 541  *
 542  * To reduce the number of temporary communication buffers and avoid
 543  * cache polution, we reuse gmx::RVec buffers for storing reals.
 544  * This functions return a real buffer reference with the same number
 545  * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
 546  */
 547 static gmx::ArrayRef<real>
 548 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
 549 {
 550     return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
 551                                   arrayRef.size());
 552 }
 553
 554 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
 555 {
 556     int                    nzone, nat_tot;
 557     gmx_domdec_comm_t     *comm;
 558     gmx_domdec_comm_dim_t *cd;
 559
 560     comm = dd->comm;
 561
 562     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 563
 564     nzone   = 1;
 565     nat_tot = comm->atomRanges.numHomeAtoms();
 566     for (int d = 0; d < dd->ndim; d++)
 567     {
 568         cd = &comm->cd[d];
 569         for (const gmx_domdec_ind_t &ind : cd->ind)
 570         {
 571             /* Note: We provision for RVec instead of real, so a factor of 3
 572              * more than needed. The buffer actually already has this size
 573              * and we pass a plain pointer below, so this does not matter.
 574              */
 575             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 576             gmx::ArrayRef<real>       sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 577             int                       n          = 0;
 578             for (int g : ind.index)
 579             {
 580                 for (int j : atomGrouping.block(g))
 581                 {
 582                     sendBuffer[n++] = v[j];
 583                 }
 584             }
 585
 586             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 587
 588             gmx::ArrayRef<real>       receiveBuffer;
 589             if (cd->receiveInPlace)
 590             {
 591                 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 592             }
 593             else
 594             {
 595                 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 596             }
 597             /* Send and receive the data */
 598             ddSendrecv(dd, d, dddirBackward,
 599                        sendBuffer, receiveBuffer);
 600             if (!cd->receiveInPlace)
 601             {
 602                 int j = 0;
 603                 for (int zone = 0; zone < nzone; zone++)
 604                 {
 605                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 606                     {
 607                         v[i] = receiveBuffer[j++];
 608                     }
 609                 }
 610             }
 611             nat_tot += ind.nrecv[nzone+1];
 612         }
 613         nzone += nzone;
 614     }
 615 }
 616
 617 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
 618 {
 619     int                    nzone, nat_tot;
 620     gmx_domdec_comm_t     *comm;
 621     gmx_domdec_comm_dim_t *cd;
 622
 623     comm = dd->comm;
 624
 625     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 626
 627     nzone   = comm->zones.n/2;
 628     nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
 629     for (int d = dd->ndim-1; d >= 0; d--)
 630     {
 631         cd = &comm->cd[d];
 632         for (int p = cd->numPulses() - 1; p >= 0; p--)
 633         {
 634             const gmx_domdec_ind_t &ind = cd->ind[p];
 635
 636             /* Note: We provision for RVec instead of real, so a factor of 3
 637              * more than needed. The buffer actually already has this size
 638              * and we typecast, so this works as intended.
 639              */
 640             DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
 641             gmx::ArrayRef<real>       receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
 642             nat_tot -= ind.nrecv[nzone + 1];
 643
 644             DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
 645
 646             gmx::ArrayRef<real>       sendBuffer;
 647             if (cd->receiveInPlace)
 648             {
 649                 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
 650             }
 651             else
 652             {
 653                 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
 654                 int j = 0;
 655                 for (int zone = 0; zone < nzone; zone++)
 656                 {
 657                     for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
 658                     {
 659                         sendBuffer[j++] = v[i];
 660                     }
 661                 }
 662             }
 663             /* Communicate the forces */
 664             ddSendrecv(dd, d, dddirForward,
 665                        sendBuffer, receiveBuffer);
 666             /* Add the received forces */
 667             int n = 0;
 668             for (int g : ind.index)
 669             {
 670                 for (int j : atomGrouping.block(g))
 671                 {
 672                     v[j] += receiveBuffer[n];
 673                     n++;
 674                 }
 675             }
 676         }
 677         nzone /= 2;
 678     }
 679 }
 680
 681 real dd_cutoff_multibody(const gmx_domdec_t *dd)
 682 {
 683     gmx_domdec_comm_t *comm;
 684     int                di;
 685     real               r;
 686
 687     comm = dd->comm;
 688
 689     r = -1;
 690     if (comm->bInterCGBondeds)
 691     {
 692         if (comm->cutoff_mbody > 0)
 693         {
 694             r = comm->cutoff_mbody;
 695         }
 696         else
 697         {
 698             /* cutoff_mbody=0 means we do not have DLB */
 699             r = comm->cellsize_min[dd->dim[0]];
 700             for (di = 1; di < dd->ndim; di++)
 701             {
 702                 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
 703             }
 704             if (comm->bBondComm)
 705             {
 706                 r = std::max(r, comm->cutoff_mbody);
 707             }
 708             else
 709             {
 710                 r = std::min(r, comm->cutoff);
 711             }
 712         }
 713     }
 714
 715     return r;
 716 }
 717
 718 real dd_cutoff_twobody(const gmx_domdec_t *dd)
 719 {
 720     real r_mb;
 721
 722     r_mb = dd_cutoff_multibody(dd);
 723
 724     return std::max(dd->comm->cutoff, r_mb);
 725 }
 726
 727
 728 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
 729                                    ivec coord_pme)
 730 {
 731     int nc, ntot;
 732
 733     nc   = dd->nc[dd->comm->cartpmedim];
 734     ntot = dd->comm->ntot[dd->comm->cartpmedim];
 735     copy_ivec(coord, coord_pme);
 736     coord_pme[dd->comm->cartpmedim] =
 737         nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 738 }
 739
 740 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
 741 {
 742     int npp, npme;
 743
 744     npp  = dd->nnodes;
 745     npme = dd->comm->npmenodes;
 746
 747     /* Here we assign a PME node to communicate with this DD node
 748      * by assuming that the major index of both is x.
 749      * We add cr->npmenodes/2 to obtain an even distribution.
 750      */
 751     return (ddindex*npme + npme/2)/npp;
 752 }
 753
 754 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
 755 {
 756     int *pme_rank;
 757     int  n, i, p0, p1;
 758
 759     snew(pme_rank, dd->comm->npmenodes);
 760     n = 0;
 761     for (i = 0; i < dd->nnodes; i++)
 762     {
 763         p0 = ddindex2pmeindex(dd, i);
 764         p1 = ddindex2pmeindex(dd, i+1);
 765         if (i+1 == dd->nnodes || p1 > p0)
 766         {
 767             if (debug)
 768             {
 769                 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
 770             }
 771             pme_rank[n] = i + 1 + n;
 772             n++;
 773         }
 774     }
 775
 776     return pme_rank;
 777 }
 778
 779 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
 780 {
 781     gmx_domdec_t *dd;
 782     ivec          coords;
 783     int           slab;
 784
 785     dd = cr->dd;
 786     /*
 787        if (dd->comm->bCartesian) {
 788        gmx_ddindex2xyz(dd->nc,ddindex,coords);
 789        dd_coords2pmecoords(dd,coords,coords_pme);
 790        copy_ivec(dd->ntot,nc);
 791        nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 792        coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 793
 794        slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 795        } else {
 796        slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 797        }
 798      */
 799     coords[XX] = x;
 800     coords[YY] = y;
 801     coords[ZZ] = z;
 802     slab       = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
 803
 804     return slab;
 805 }
 806
 807 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
 808 {
 809     gmx_domdec_comm_t *comm;
 810     ivec               coords;
 811     int                ddindex, nodeid = -1;
 812
 813     comm = cr->dd->comm;
 814
 815     coords[XX] = x;
 816     coords[YY] = y;
 817     coords[ZZ] = z;
 818     if (comm->bCartesianPP_PME)
 819     {
 820 #if GMX_MPI
 821         MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
 822 #endif
 823     }
 824     else
 825     {
 826         ddindex = dd_index(cr->dd->nc, coords);
 827         if (comm->bCartesianPP)
 828         {
 829             nodeid = comm->ddindex2simnodeid[ddindex];
 830         }
 831         else
 832         {
 833             if (comm->pmenodes)
 834             {
 835                 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
 836             }
 837             else
 838             {
 839                 nodeid = ddindex;
 840             }
 841         }
 842     }
 843
 844     return nodeid;
 845 }
 846
 847 static int dd_simnode2pmenode(const gmx_domdec_t         *dd,
 848                               const t_commrec gmx_unused *cr,
 849                               int                         sim_nodeid)
 850 {
 851     int pmenode = -1;
 852
 853     const gmx_domdec_comm_t *comm = dd->comm;
 854
 855     /* This assumes a uniform x domain decomposition grid cell size */
 856     if (comm->bCartesianPP_PME)
 857     {
 858 #if GMX_MPI
 859         ivec coord, coord_pme;
 860         MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
 861         if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 862         {
 863             /* This is a PP node */
 864             dd_cart_coord2pmecoord(dd, coord, coord_pme);
 865             MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
 866         }
 867 #endif
 868     }
 869     else if (comm->bCartesianPP)
 870     {
 871         if (sim_nodeid < dd->nnodes)
 872         {
 873             pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
 874         }
 875     }
 876     else
 877     {
 878         /* This assumes DD cells with identical x coordinates
 879          * are numbered sequentially.
 880          */
 881         if (dd->comm->pmenodes == nullptr)
 882         {
 883             if (sim_nodeid < dd->nnodes)
 884             {
 885                 /* The DD index equals the nodeid */
 886                 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
 887             }
 888         }
 889         else
 890         {
 891             int i = 0;
 892             while (sim_nodeid > dd->comm->pmenodes[i])
 893             {
 894                 i++;
 895             }
 896             if (sim_nodeid < dd->comm->pmenodes[i])
 897             {
 898                 pmenode = dd->comm->pmenodes[i];
 899             }
 900         }
 901     }
 902
 903     return pmenode;
 904 }
 905
 906 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
 907 {
 908     if (dd != nullptr)
 909     {
 910         return {
 911                    dd->comm->npmenodes_x, dd->comm->npmenodes_y
 912         };
 913     }
 914     else
 915     {
 916         return {
 917                    1, 1
 918         };
 919     }
 920 }
 921
 922 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
 923 {
 924     gmx_domdec_t *dd;
 925     int           x, y, z;
 926     ivec          coord, coord_pme;
 927
 928     dd = cr->dd;
 929
 930     std::vector<int> ddranks;
 931     ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 932
 933     for (x = 0; x < dd->nc[XX]; x++)
 934     {
 935         for (y = 0; y < dd->nc[YY]; y++)
 936         {
 937             for (z = 0; z < dd->nc[ZZ]; z++)
 938             {
 939                 if (dd->comm->bCartesianPP_PME)
 940                 {
 941                     coord[XX] = x;
 942                     coord[YY] = y;
 943                     coord[ZZ] = z;
 944                     dd_cart_coord2pmecoord(dd, coord, coord_pme);
 945                     if (dd->ci[XX] == coord_pme[XX] &&
 946                         dd->ci[YY] == coord_pme[YY] &&
 947                         dd->ci[ZZ] == coord_pme[ZZ])
 948                     {
 949                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 950                     }
 951                 }
 952                 else
 953                 {
 954                     /* The slab corresponds to the nodeid in the PME group */
 955                     if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
 956                     {
 957                         ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
 958                     }
 959                 }
 960             }
 961         }
 962     }
 963     return ddranks;
 964 }
 965
 966 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
 967 {
 968     gmx_bool bReceive = TRUE;
 969
 970     if (cr->npmenodes < dd->nnodes)
 971     {
 972         gmx_domdec_comm_t *comm = dd->comm;
 973         if (comm->bCartesianPP_PME)
 974         {
 975 #if GMX_MPI
 976             int  pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
 977             ivec coords;
 978             MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
 979             coords[comm->cartpmedim]++;
 980             if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 981             {
 982                 int rank;
 983                 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
 984                 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
 985                 {
 986                     /* This is not the last PP node for pmenode */
 987                     bReceive = FALSE;
 988                 }
 989             }
 990 #else
 991             GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
 992 #endif
 993         }
 994         else
 995         {
 996             int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
 997             if (cr->sim_nodeid+1 < cr->nnodes &&
 998                 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
 999             {
1000                 /* This is not the last PP node for pmenode */
1001                 bReceive = FALSE;
1002             }
1003         }
1004     }
1005
1006     return bReceive;
1007 }
1008
1009 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1010 {
1011     gmx_domdec_comm_t *comm;
1012     int                i;
1013
1014     comm = dd->comm;
1015
1016     snew(*dim_f, dd->nc[dim]+1);
1017     (*dim_f)[0] = 0;
1018     for (i = 1; i < dd->nc[dim]; i++)
1019     {
1020         if (comm->slb_frac[dim])
1021         {
1022             (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1023         }
1024         else
1025         {
1026             (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1027         }
1028     }
1029     (*dim_f)[dd->nc[dim]] = 1;
1030 }
1031
1032 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1033 {
1034     int  pmeindex, slab, nso, i;
1035     ivec xyz;
1036
1037     if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1038     {
1039         ddpme->dim = YY;
1040     }
1041     else
1042     {
1043         ddpme->dim = dimind;
1044     }
1045     ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1046
1047     ddpme->nslab = (ddpme->dim == 0 ?
1048                     dd->comm->npmenodes_x :
1049                     dd->comm->npmenodes_y);
1050
1051     if (ddpme->nslab <= 1)
1052     {
1053         return;
1054     }
1055
1056     nso = dd->comm->npmenodes/ddpme->nslab;
1057     /* Determine for each PME slab the PP location range for dimension dim */
1058     snew(ddpme->pp_min, ddpme->nslab);
1059     snew(ddpme->pp_max, ddpme->nslab);
1060     for (slab = 0; slab < ddpme->nslab; slab++)
1061     {
1062         ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1063         ddpme->pp_max[slab] = 0;
1064     }
1065     for (i = 0; i < dd->nnodes; i++)
1066     {
1067         ddindex2xyz(dd->nc, i, xyz);
1068         /* For y only use our y/z slab.
1069          * This assumes that the PME x grid size matches the DD grid size.
1070          */
1071         if (dimind == 0 || xyz[XX] == dd->ci[XX])
1072         {
1073             pmeindex = ddindex2pmeindex(dd, i);
1074             if (dimind == 0)
1075             {
1076                 slab = pmeindex/nso;
1077             }
1078             else
1079             {
1080                 slab = pmeindex % ddpme->nslab;
1081             }
1082             ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1083             ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1084         }
1085     }
1086
1087     set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1088 }
1089
1090 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1091 {
1092     if (dd->comm->ddpme[0].dim == XX)
1093     {
1094         return dd->comm->ddpme[0].maxshift;
1095     }
1096     else
1097     {
1098         return 0;
1099     }
1100 }
1101
1102 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1103 {
1104     if (dd->comm->ddpme[0].dim == YY)
1105     {
1106         return dd->comm->ddpme[0].maxshift;
1107     }
1108     else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1109     {
1110         return dd->comm->ddpme[1].maxshift;
1111     }
1112     else
1113     {
1114         return 0;
1115     }
1116 }
1117
1118 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1119 {
1120     /* Note that the cycles value can be incorrect, either 0 or some
1121      * extremely large value, when our thread migrated to another core
1122      * with an unsynchronized cycle counter. If this happens less often
1123      * that once per nstlist steps, this will not cause issues, since
1124      * we later subtract the maximum value from the sum over nstlist steps.
1125      * A zero count will slightly lower the total, but that's a small effect.
1126      * Note that the main purpose of the subtraction of the maximum value
1127      * is to avoid throwing off the load balancing when stalls occur due
1128      * e.g. system activity or network congestion.
1129      */
1130     dd->comm->cycl[ddCycl] += cycles;
1131     dd->comm->cycl_n[ddCycl]++;
1132     if (cycles > dd->comm->cycl_max[ddCycl])
1133     {
1134         dd->comm->cycl_max[ddCycl] = cycles;
1135     }
1136 }
1137
1138 #if GMX_MPI
1139 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1140 {
1141     MPI_Comm           c_row;
1142     int                dim, i, rank;
1143     ivec               loc_c;
1144     gmx_bool           bPartOfGroup = FALSE;
1145
1146     dim = dd->dim[dim_ind];
1147     copy_ivec(loc, loc_c);
1148     for (i = 0; i < dd->nc[dim]; i++)
1149     {
1150         loc_c[dim] = i;
1151         rank       = dd_index(dd->nc, loc_c);
1152         if (rank == dd->rank)
1153         {
1154             /* This process is part of the group */
1155             bPartOfGroup = TRUE;
1156         }
1157     }
1158     MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1159                    &c_row);
1160     if (bPartOfGroup)
1161     {
1162         dd->comm->mpi_comm_load[dim_ind] = c_row;
1163         if (!isDlbDisabled(dd->comm))
1164         {
1165             DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1166
1167             if (dd->ci[dim] == dd->master_ci[dim])
1168             {
1169                 /* This is the root process of this row */
1170                 cellsizes.rowMaster  = gmx::compat::make_unique<RowMaster>();
1171
1172                 RowMaster &rowMaster = *cellsizes.rowMaster;
1173                 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1174                 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1175                 rowMaster.isCellMin.resize(dd->nc[dim]);
1176                 if (dim_ind > 0)
1177                 {
1178                     rowMaster.bounds.resize(dd->nc[dim]);
1179                 }
1180                 rowMaster.buf_ncd.resize(dd->nc[dim]);
1181             }
1182             else
1183             {
1184                 /* This is not a root process, we only need to receive cell_f */
1185                 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1186             }
1187         }
1188         if (dd->ci[dim] == dd->master_ci[dim])
1189         {
1190             snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1191         }
1192     }
1193 }
1194 #endif
1195
1196 void dd_setup_dlb_resource_sharing(t_commrec            *cr,
1197                                    int                   gpu_id)
1198 {
1199 #if GMX_MPI
1200     int           physicalnode_id_hash;
1201     gmx_domdec_t *dd;
1202     MPI_Comm      mpi_comm_pp_physicalnode;
1203
1204     if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1205     {
1206         /* Only ranks with short-ranged tasks (currently) use GPUs.
1207          * If we don't have GPUs assigned, there are no resources to share.
1208          */
1209         return;
1210     }
1211
1212     physicalnode_id_hash = gmx_physicalnode_id_hash();
1213
1214     dd = cr->dd;
1215
1216     if (debug)
1217     {
1218         fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1219         fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1220                 dd->rank, physicalnode_id_hash, gpu_id);
1221     }
1222     /* Split the PP communicator over the physical nodes */
1223     /* TODO: See if we should store this (before), as it's also used for
1224      * for the nodecomm summation.
1225      */
1226     // TODO PhysicalNodeCommunicator could be extended/used to handle
1227     // the need for per-node per-group communicators.
1228     MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1229                    &mpi_comm_pp_physicalnode);
1230     MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1231                    &dd->comm->mpi_comm_gpu_shared);
1232     MPI_Comm_free(&mpi_comm_pp_physicalnode);
1233     MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1234
1235     if (debug)
1236     {
1237         fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1238     }
1239
1240     /* Note that some ranks could share a GPU, while others don't */
1241
1242     if (dd->comm->nrank_gpu_shared == 1)
1243     {
1244         MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1245     }
1246 #else
1247     GMX_UNUSED_VALUE(cr);
1248     GMX_UNUSED_VALUE(gpu_id);
1249 #endif
1250 }
1251
1252 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1253 {
1254 #if GMX_MPI
1255     int  dim0, dim1, i, j;
1256     ivec loc;
1257
1258     if (debug)
1259     {
1260         fprintf(debug, "Making load communicators\n");
1261     }
1262
1263     snew(dd->comm->load,          std::max(dd->ndim, 1));
1264     snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1265
1266     if (dd->ndim == 0)
1267     {
1268         return;
1269     }
1270
1271     clear_ivec(loc);
1272     make_load_communicator(dd, 0, loc);
1273     if (dd->ndim > 1)
1274     {
1275         dim0 = dd->dim[0];
1276         for (i = 0; i < dd->nc[dim0]; i++)
1277         {
1278             loc[dim0] = i;
1279             make_load_communicator(dd, 1, loc);
1280         }
1281     }
1282     if (dd->ndim > 2)
1283     {
1284         dim0 = dd->dim[0];
1285         for (i = 0; i < dd->nc[dim0]; i++)
1286         {
1287             loc[dim0] = i;
1288             dim1      = dd->dim[1];
1289             for (j = 0; j < dd->nc[dim1]; j++)
1290             {
1291                 loc[dim1] = j;
1292                 make_load_communicator(dd, 2, loc);
1293             }
1294         }
1295     }
1296
1297     if (debug)
1298     {
1299         fprintf(debug, "Finished making load communicators\n");
1300     }
1301 #endif
1302 }
1303
1304 /*! \brief Sets up the relation between neighboring domains and zones */
1305 static void setup_neighbor_relations(gmx_domdec_t *dd)
1306 {
1307     int                     d, dim, i, j, m;
1308     ivec                    tmp, s;
1309     gmx_domdec_zones_t     *zones;
1310     gmx_domdec_ns_ranges_t *izone;
1311
1312     for (d = 0; d < dd->ndim; d++)
1313     {
1314         dim = dd->dim[d];
1315         copy_ivec(dd->ci, tmp);
1316         tmp[dim]           = (tmp[dim] + 1) % dd->nc[dim];
1317         dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1318         copy_ivec(dd->ci, tmp);
1319         tmp[dim]           = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1320         dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1321         if (debug)
1322         {
1323             fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1324                     dd->rank, dim,
1325                     dd->neighbor[d][0],
1326                     dd->neighbor[d][1]);
1327         }
1328     }
1329
1330     int nzone  = (1 << dd->ndim);
1331     int nizone = (1 << std::max(dd->ndim - 1, 0));
1332     assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1333
1334     zones = &dd->comm->zones;
1335
1336     for (i = 0; i < nzone; i++)
1337     {
1338         m = 0;
1339         clear_ivec(zones->shift[i]);
1340         for (d = 0; d < dd->ndim; d++)
1341         {
1342             zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1343         }
1344     }
1345
1346     zones->n = nzone;
1347     for (i = 0; i < nzone; i++)
1348     {
1349         for (d = 0; d < DIM; d++)
1350         {
1351             s[d] = dd->ci[d] - zones->shift[i][d];
1352             if (s[d] < 0)
1353             {
1354                 s[d] += dd->nc[d];
1355             }
1356             else if (s[d] >= dd->nc[d])
1357             {
1358                 s[d] -= dd->nc[d];
1359             }
1360         }
1361     }
1362     zones->nizone = nizone;
1363     for (i = 0; i < zones->nizone; i++)
1364     {
1365         assert(ddNonbondedZonePairRanges[i][0] == i);
1366
1367         izone     = &zones->izone[i];
1368         /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1369          * j-zones up to nzone.
1370          */
1371         izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1372         izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1373         for (dim = 0; dim < DIM; dim++)
1374         {
1375             if (dd->nc[dim] == 1)
1376             {
1377                 /* All shifts should be allowed */
1378                 izone->shift0[dim] = -1;
1379                 izone->shift1[dim] = 1;
1380             }
1381             else
1382             {
1383                 /* Determine the min/max j-zone shift wrt the i-zone */
1384                 izone->shift0[dim] = 1;
1385                 izone->shift1[dim] = -1;
1386                 for (j = izone->j0; j < izone->j1; j++)
1387                 {
1388                     int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1389                     if (shift_diff < izone->shift0[dim])
1390                     {
1391                         izone->shift0[dim] = shift_diff;
1392                     }
1393                     if (shift_diff > izone->shift1[dim])
1394                     {
1395                         izone->shift1[dim] = shift_diff;
1396                     }
1397                 }
1398             }
1399         }
1400     }
1401
1402     if (!isDlbDisabled(dd->comm))
1403     {
1404         dd->comm->cellsizesWithDlb.resize(dd->ndim);
1405     }
1406
1407     if (dd->comm->bRecordLoad)
1408     {
1409         make_load_communicators(dd);
1410     }
1411 }
1412
1413 static void make_pp_communicator(const gmx::MDLogger  &mdlog,
1414                                  gmx_domdec_t         *dd,
1415                                  t_commrec gmx_unused *cr,
1416                                  bool gmx_unused       reorder)
1417 {
1418 #if GMX_MPI
1419     gmx_domdec_comm_t *comm;
1420     int                rank, *buf;
1421     ivec               periods;
1422     MPI_Comm           comm_cart;
1423
1424     comm = dd->comm;
1425
1426     if (comm->bCartesianPP)
1427     {
1428         /* Set up cartesian communication for the particle-particle part */
1429         GMX_LOG(mdlog.info).appendTextFormatted(
1430                 "Will use a Cartesian communicator: %d x %d x %d",
1431                 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1432
1433         for (int i = 0; i < DIM; i++)
1434         {
1435             periods[i] = TRUE;
1436         }
1437         MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1438                         &comm_cart);
1439         /* We overwrite the old communicator with the new cartesian one */
1440         cr->mpi_comm_mygroup = comm_cart;
1441     }
1442
1443     dd->mpi_comm_all = cr->mpi_comm_mygroup;
1444     MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1445
1446     if (comm->bCartesianPP_PME)
1447     {
1448         /* Since we want to use the original cartesian setup for sim,
1449          * and not the one after split, we need to make an index.
1450          */
1451         snew(comm->ddindex2ddnodeid, dd->nnodes);
1452         comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1453         gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
1454         /* Get the rank of the DD master,
1455          * above we made sure that the master node is a PP node.
1456          */
1457         if (MASTER(cr))
1458         {
1459             rank = dd->rank;
1460         }
1461         else
1462         {
1463             rank = 0;
1464         }
1465         MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1466     }
1467     else if (comm->bCartesianPP)
1468     {
1469         if (cr->npmenodes == 0)
1470         {
1471             /* The PP communicator is also
1472              * the communicator for this simulation
1473              */
1474             cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1475         }
1476         cr->nodeid = dd->rank;
1477
1478         MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1479
1480         /* We need to make an index to go from the coordinates
1481          * to the nodeid of this simulation.
1482          */
1483         snew(comm->ddindex2simnodeid, dd->nnodes);
1484         snew(buf, dd->nnodes);
1485         if (thisRankHasDuty(cr, DUTY_PP))
1486         {
1487             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1488         }
1489         /* Communicate the ddindex to simulation nodeid index */
1490         MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1491                       cr->mpi_comm_mysim);
1492         sfree(buf);
1493
1494         /* Determine the master coordinates and rank.
1495          * The DD master should be the same node as the master of this sim.
1496          */
1497         for (int i = 0; i < dd->nnodes; i++)
1498         {
1499             if (comm->ddindex2simnodeid[i] == 0)
1500             {
1501                 ddindex2xyz(dd->nc, i, dd->master_ci);
1502                 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1503             }
1504         }
1505         if (debug)
1506         {
1507             fprintf(debug, "The master rank is %d\n", dd->masterrank);
1508         }
1509     }
1510     else
1511     {
1512         /* No Cartesian communicators */
1513         /* We use the rank in dd->comm->all as DD index */
1514         ddindex2xyz(dd->nc, dd->rank, dd->ci);
1515         /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1516         dd->masterrank = 0;
1517         clear_ivec(dd->master_ci);
1518     }
1519 #endif
1520
1521     GMX_LOG(mdlog.info).appendTextFormatted(
1522             "Domain decomposition rank %d, coordinates %d %d %d\n",
1523             dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1524     if (debug)
1525     {
1526         fprintf(debug,
1527                 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1528                 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1529     }
1530 }
1531
1532 static void receive_ddindex2simnodeid(gmx_domdec_t         *dd,
1533                                       t_commrec            *cr)
1534 {
1535 #if GMX_MPI
1536     gmx_domdec_comm_t *comm = dd->comm;
1537
1538     if (!comm->bCartesianPP_PME && comm->bCartesianPP)
1539     {
1540         int *buf;
1541         snew(comm->ddindex2simnodeid, dd->nnodes);
1542         snew(buf, dd->nnodes);
1543         if (thisRankHasDuty(cr, DUTY_PP))
1544         {
1545             buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1546         }
1547         /* Communicate the ddindex to simulation nodeid index */
1548         MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1549                       cr->mpi_comm_mysim);
1550         sfree(buf);
1551     }
1552 #else
1553     GMX_UNUSED_VALUE(dd);
1554     GMX_UNUSED_VALUE(cr);
1555 #endif
1556 }
1557
1558 static void split_communicator(const gmx::MDLogger &mdlog,
1559                                t_commrec *cr, gmx_domdec_t *dd,
1560                                DdRankOrder gmx_unused rankOrder,
1561                                bool gmx_unused reorder)
1562 {
1563     gmx_domdec_comm_t *comm;
1564     int                i;
1565     gmx_bool           bDiv[DIM];
1566 #if GMX_MPI
1567     MPI_Comm           comm_cart;
1568 #endif
1569
1570     comm = dd->comm;
1571
1572     if (comm->bCartesianPP)
1573     {
1574         for (i = 1; i < DIM; i++)
1575         {
1576             bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
1577         }
1578         if (bDiv[YY] || bDiv[ZZ])
1579         {
1580             comm->bCartesianPP_PME = TRUE;
1581             /* If we have 2D PME decomposition, which is always in x+y,
1582              * we stack the PME only nodes in z.
1583              * Otherwise we choose the direction that provides the thinnest slab
1584              * of PME only nodes as this will have the least effect
1585              * on the PP communication.
1586              * But for the PME communication the opposite might be better.
1587              */
1588             if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
1589                              !bDiv[YY] ||
1590                              dd->nc[YY] > dd->nc[ZZ]))
1591             {
1592                 comm->cartpmedim = ZZ;
1593             }
1594             else
1595             {
1596                 comm->cartpmedim = YY;
1597             }
1598             comm->ntot[comm->cartpmedim]
1599                 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
1600         }
1601         else
1602         {
1603             GMX_LOG(mdlog.info).appendTextFormatted(
1604                     "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1605                     cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
1606             GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1607         }
1608     }
1609
1610     if (comm->bCartesianPP_PME)
1611     {
1612 #if GMX_MPI
1613         int  rank;
1614         ivec periods;
1615
1616         GMX_LOG(mdlog.info).appendTextFormatted(
1617                 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1618                 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
1619
1620         for (i = 0; i < DIM; i++)
1621         {
1622             periods[i] = TRUE;
1623         }
1624         MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
1625                         &comm_cart);
1626         MPI_Comm_rank(comm_cart, &rank);
1627         if (MASTER(cr) && rank != 0)
1628         {
1629             gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1630         }
1631
1632         /* With this assigment we loose the link to the original communicator
1633          * which will usually be MPI_COMM_WORLD, unless have multisim.
1634          */
1635         cr->mpi_comm_mysim = comm_cart;
1636         cr->sim_nodeid     = rank;
1637
1638         MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
1639
1640         GMX_LOG(mdlog.info).appendTextFormatted(
1641                 "Cartesian rank %d, coordinates %d %d %d\n",
1642                 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1643
1644         if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1645         {
1646             cr->duty = DUTY_PP;
1647         }
1648         if (cr->npmenodes == 0 ||
1649             dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
1650         {
1651             cr->duty = DUTY_PME;
1652         }
1653
1654         /* Split the sim communicator into PP and PME only nodes */
1655         MPI_Comm_split(cr->mpi_comm_mysim,
1656                        getThisRankDuties(cr),
1657                        dd_index(comm->ntot, dd->ci),
1658                        &cr->mpi_comm_mygroup);
1659 #endif
1660     }
1661     else
1662     {
1663         switch (rankOrder)
1664         {
1665             case DdRankOrder::pp_pme:
1666                 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1667                 break;
1668             case DdRankOrder::interleave:
1669                 /* Interleave the PP-only and PME-only ranks */
1670                 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1671                 comm->pmenodes = dd_interleaved_pme_ranks(dd);
1672                 break;
1673             case DdRankOrder::cartesian:
1674                 break;
1675             default:
1676                 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
1677         }
1678
1679         if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
1680         {
1681             cr->duty = DUTY_PME;
1682         }
1683         else
1684         {
1685             cr->duty = DUTY_PP;
1686         }
1687 #if GMX_MPI
1688         /* Split the sim communicator into PP and PME only nodes */
1689         MPI_Comm_split(cr->mpi_comm_mysim,
1690                        getThisRankDuties(cr),
1691                        cr->nodeid,
1692                        &cr->mpi_comm_mygroup);
1693         MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1694 #endif
1695     }
1696
1697     GMX_LOG(mdlog.info).appendTextFormatted(
1698             "This rank does only %s work.\n",
1699             thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1700 }
1701
1702 /*! \brief Generates the MPI communicators for domain decomposition */
1703 static void make_dd_communicators(const gmx::MDLogger &mdlog,
1704                                   t_commrec *cr,
1705                                   gmx_domdec_t *dd, DdRankOrder ddRankOrder)
1706 {
1707     gmx_domdec_comm_t *comm;
1708     bool               CartReorder;
1709
1710     comm = dd->comm;
1711
1712     copy_ivec(dd->nc, comm->ntot);
1713
1714     comm->bCartesianPP     = (ddRankOrder == DdRankOrder::cartesian);
1715     comm->bCartesianPP_PME = FALSE;
1716
1717     /* Reorder the nodes by default. This might change the MPI ranks.
1718      * Real reordering is only supported on very few architectures,
1719      * Blue Gene is one of them.
1720      */
1721     CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
1722
1723     if (cr->npmenodes > 0)
1724     {
1725         /* Split the communicator into a PP and PME part */
1726         split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
1727         if (comm->bCartesianPP_PME)
1728         {
1729             /* We (possibly) reordered the nodes in split_communicator,
1730              * so it is no longer required in make_pp_communicator.
1731              */
1732             CartReorder = FALSE;
1733         }
1734     }
1735     else
1736     {
1737         /* All nodes do PP and PME */
1738 #if GMX_MPI
1739         /* We do not require separate communicators */
1740         cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1741 #endif
1742     }
1743
1744     if (thisRankHasDuty(cr, DUTY_PP))
1745     {
1746         /* Copy or make a new PP communicator */
1747         make_pp_communicator(mdlog, dd, cr, CartReorder);
1748     }
1749     else
1750     {
1751         receive_ddindex2simnodeid(dd, cr);
1752     }
1753
1754     if (!thisRankHasDuty(cr, DUTY_PME))
1755     {
1756         /* Set up the commnuication to our PME node */
1757         dd->pme_nodeid           = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1758         dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
1759         if (debug)
1760         {
1761             fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1762                     dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1763         }
1764     }
1765     else
1766     {
1767         dd->pme_nodeid = -1;
1768     }
1769
1770     if (DDMASTER(dd))
1771     {
1772         dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
1773                                                             comm->cgs_gl.nr,
1774                                                             comm->cgs_gl.index[comm->cgs_gl.nr]);
1775     }
1776 }
1777
1778 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1779                           const char *dir, int nc, const char *size_string)
1780 {
1781     real  *slb_frac, tot;
1782     int    i, n;
1783     double dbl;
1784
1785     slb_frac = nullptr;
1786     if (nc > 1 && size_string != nullptr)
1787     {
1788         GMX_LOG(mdlog.info).appendTextFormatted(
1789                 "Using static load balancing for the %s direction", dir);
1790         snew(slb_frac, nc);
1791         tot = 0;
1792         for (i = 0; i < nc; i++)
1793         {
1794             dbl = 0;
1795             sscanf(size_string, "%20lf%n", &dbl, &n);
1796             if (dbl == 0)
1797             {
1798                 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1799             }
1800             slb_frac[i]  = dbl;
1801             size_string += n;
1802             tot         += slb_frac[i];
1803         }
1804         /* Normalize */
1805         std::string relativeCellSizes = "Relative cell sizes:";
1806         for (i = 0; i < nc; i++)
1807         {
1808             slb_frac[i]       /= tot;
1809             relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1810         }
1811         GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1812     }
1813
1814     return slb_frac;
1815 }
1816
1817 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1818 {
1819     int                  n     = 0;
1820     gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1821     int                  nmol;
1822     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1823     {
1824         for (auto &ilist : extractILists(*ilists, IF_BOND))
1825         {
1826             if (NRAL(ilist.functionType) >  2)
1827             {
1828                 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1829             }
1830         }
1831     }
1832
1833     return n;
1834 }
1835
1836 static int dd_getenv(const gmx::MDLogger &mdlog,
1837                      const char *env_var, int def)
1838 {
1839     char *val;
1840     int   nst;
1841
1842     nst = def;
1843     val = getenv(env_var);
1844     if (val)
1845     {
1846         if (sscanf(val, "%20d", &nst) <= 0)
1847         {
1848             nst = 1;
1849         }
1850         GMX_LOG(mdlog.info).appendTextFormatted(
1851                 "Found env.var. %s = %s, using value %d",
1852                 env_var, val, nst);
1853     }
1854
1855     return nst;
1856 }
1857
1858 static void check_dd_restrictions(const gmx_domdec_t  *dd,
1859                                   const t_inputrec    *ir,
1860                                   const gmx::MDLogger &mdlog)
1861 {
1862     if (ir->ePBC == epbcSCREW &&
1863         (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1864     {
1865         gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1866     }
1867
1868     if (ir->ns_type == ensSIMPLE)
1869     {
1870         gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1871     }
1872
1873     if (ir->nstlist == 0)
1874     {
1875         gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1876     }
1877
1878     if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1879     {
1880         GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1881     }
1882 }
1883
1884 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
1885 {
1886     int  di, d;
1887     real r;
1888
1889     r = ddbox->box_size[XX];
1890     for (di = 0; di < dd->ndim; di++)
1891     {
1892         d = dd->dim[di];
1893         /* Check using the initial average cell size */
1894         r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
1895     }
1896
1897     return r;
1898 }
1899
1900 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1901  */
1902 static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
1903                                   const std::string   &reasonStr,
1904                                   const gmx::MDLogger &mdlog)
1905 {
1906     std::string dlbNotSupportedErr  = "Dynamic load balancing requested, but ";
1907     std::string dlbDisableNote      = "NOTE: disabling dynamic load balancing as ";
1908
1909     if (cmdlineDlbState == DlbState::onUser)
1910     {
1911         gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1912     }
1913     else if (cmdlineDlbState == DlbState::offCanTurnOn)
1914     {
1915         GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1916     }
1917     return DlbState::offForever;
1918 }
1919
1920 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1921  *
1922  * This function parses the parameters of "-dlb" command line option setting
1923  * corresponding state values. Then it checks the consistency of the determined
1924  * state with other run parameters and settings. As a result, the initial state
1925  * may be altered or an error may be thrown if incompatibility of options is detected.
1926  *
1927  * \param [in] mdlog       Logger.
1928  * \param [in] dlbOption   Enum value for the DLB option.
1929  * \param [in] bRecordLoad True if the load balancer is recording load information.
1930  * \param [in] mdrunOptions  Options for mdrun.
1931  * \param [in] ir          Pointer mdrun to input parameters.
1932  * \returns                DLB initial/startup state.
1933  */
1934 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1935                                          DlbOption dlbOption, gmx_bool bRecordLoad,
1936                                          const MdrunOptions &mdrunOptions,
1937                                          const t_inputrec *ir)
1938 {
1939     DlbState dlbState = DlbState::offCanTurnOn;
1940
1941     switch (dlbOption)
1942     {
1943         case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1944         case DlbOption::no:               dlbState = DlbState::offUser;      break;
1945         case DlbOption::yes:              dlbState = DlbState::onUser;       break;
1946         default: gmx_incons("Invalid dlbOption enum value");
1947     }
1948
1949     /* Reruns don't support DLB: bail or override auto mode */
1950     if (mdrunOptions.rerun)
1951     {
1952         std::string reasonStr = "it is not supported in reruns.";
1953         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1954     }
1955
1956     /* Unsupported integrators */
1957     if (!EI_DYNAMICS(ir->eI))
1958     {
1959         auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1960         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1961     }
1962
1963     /* Without cycle counters we can't time work to balance on */
1964     if (!bRecordLoad)
1965     {
1966         std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1967         return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1968     }
1969
1970     if (mdrunOptions.reproducible)
1971     {
1972         std::string reasonStr = "you started a reproducible run.";
1973         switch (dlbState)
1974         {
1975             case DlbState::offUser:
1976                 break;
1977             case DlbState::offForever:
1978                 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1979                 break;
1980             case DlbState::offCanTurnOn:
1981                 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1982             case DlbState::onCanTurnOff:
1983                 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1984                 break;
1985             case DlbState::onUser:
1986                 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1987             default:
1988                 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1989         }
1990     }
1991
1992     return dlbState;
1993 }
1994
1995 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
1996 {
1997     dd->ndim = 0;
1998     if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
1999     {
2000         /* Decomposition order z,y,x */
2001         GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
2002         for (int dim = DIM-1; dim >= 0; dim--)
2003         {
2004             if (dd->nc[dim] > 1)
2005             {
2006                 dd->dim[dd->ndim++] = dim;
2007             }
2008         }
2009     }
2010     else
2011     {
2012         /* Decomposition order x,y,z */
2013         for (int dim = 0; dim < DIM; dim++)
2014         {
2015             if (dd->nc[dim] > 1)
2016             {
2017                 dd->dim[dd->ndim++] = dim;
2018             }
2019         }
2020     }
2021
2022     if (dd->ndim == 0)
2023     {
2024         /* Set dim[0] to avoid extra checks on ndim in several places */
2025         dd->dim[0] = XX;
2026     }
2027 }
2028
2029 static gmx_domdec_comm_t *init_dd_comm()
2030 {
2031     gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2032
2033     comm->n_load_have      = 0;
2034     comm->n_load_collect   = 0;
2035
2036     comm->haveTurnedOffDlb = false;
2037
2038     for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2039     {
2040         comm->sum_nat[i] = 0;
2041     }
2042     comm->ndecomp   = 0;
2043     comm->nload     = 0;
2044     comm->load_step = 0;
2045     comm->load_sum  = 0;
2046     comm->load_max  = 0;
2047     clear_ivec(comm->load_lim);
2048     comm->load_mdf  = 0;
2049     comm->load_pme  = 0;
2050
2051     /* This should be replaced by a unique pointer */
2052     comm->balanceRegion = ddBalanceRegionAllocate();
2053
2054     return comm;
2055 }
2056
2057 /* Returns whether mtop contains constraints and/or vsites */
2058 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2059 {
2060     auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2061     int  nmol;
2062     while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2063     {
2064         if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2065         {
2066             return true;
2067         }
2068     }
2069
2070     return false;
2071 }
2072
2073 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2074                               const gmx_mtop_t    &mtop,
2075                               const t_inputrec    &inputrec,
2076                               real                 cutoffMargin,
2077                               int                  numMpiRanksTotal,
2078                               gmx_domdec_comm_t   *comm)
2079 {
2080     /* When we have constraints and/or vsites, it is beneficial to use
2081      * update groups (when possible) to allow independent update of groups.
2082      */
2083     if (!systemHasConstraintsOrVsites(mtop))
2084     {
2085         /* No constraints or vsites, atoms can be updated independently */
2086         return;
2087     }
2088
2089     comm->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2090     comm->useUpdateGroups               = !comm->updateGroupingPerMoleculetype.empty();
2091
2092     if (comm->useUpdateGroups)
2093     {
2094         int numUpdateGroups = 0;
2095         for (const auto &molblock : mtop.molblock)
2096         {
2097             numUpdateGroups += molblock.nmol*comm->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2098         }
2099
2100         /* Note: We would like to use dd->nnodes for the atom count estimate,
2101          *       but that is not yet available here. But this anyhow only
2102          *       affect performance up to the second dd_partition_system call.
2103          */
2104         int homeAtomCountEstimate =  mtop.natoms/numMpiRanksTotal;
2105         comm->updateGroupsCog =
2106             gmx::compat::make_unique<gmx::UpdateGroupsCog>(mtop,
2107                                                            comm->updateGroupingPerMoleculetype,
2108                                                            maxReferenceTemperature(inputrec),
2109                                                            homeAtomCountEstimate);
2110
2111         /* To use update groups, the large domain-to-domain cutoff distance
2112          * should be compatible with the box size.
2113          */
2114         comm->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*comm, 0) < cutoffMargin);
2115         /* TODO: Enable update groups when all infrastructure is present */
2116         comm->useUpdateGroups = false;
2117
2118         if (comm->useUpdateGroups)
2119         {
2120             GMX_LOG(mdlog.info).appendTextFormatted(
2121                     "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2122                     numUpdateGroups,
2123                     mtop.natoms/static_cast<double>(numUpdateGroups),
2124                     comm->updateGroupsCog->maxUpdateGroupRadius());
2125         }
2126         else
2127         {
2128             /* TODO: Remove this comment when enabling update groups
2129              * GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2130              */
2131             comm->updateGroupingPerMoleculetype.clear();
2132             comm->updateGroupsCog.reset(nullptr);
2133         }
2134     }
2135 }
2136
2137 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2138 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
2139                                    t_commrec *cr, gmx_domdec_t *dd,
2140                                    const DomdecOptions &options,
2141                                    const MdrunOptions &mdrunOptions,
2142                                    const gmx_mtop_t *mtop,
2143                                    const t_inputrec *ir,
2144                                    const matrix box,
2145                                    gmx::ArrayRef<const gmx::RVec> xGlobal,
2146                                    gmx_ddbox_t *ddbox)
2147 {
2148     real               r_bonded         = -1;
2149     real               r_bonded_limit   = -1;
2150     const real         tenPercentMargin = 1.1;
2151     gmx_domdec_comm_t *comm             = dd->comm;
2152
2153     dd->npbcdim              = ePBC2npbcdim(ir->ePBC);
2154     dd->numBoundedDimensions = inputrec2nboundeddim(ir);
2155     dd->haveDynamicBox       = inputrecDynamicBox(ir);
2156     dd->bScrewPBC            = (ir->ePBC == epbcSCREW);
2157
2158     dd->pme_recv_f_alloc = 0;
2159     dd->pme_recv_f_buf   = nullptr;
2160
2161     /* Initialize to GPU share count to 0, might change later */
2162     comm->nrank_gpu_shared = 0;
2163
2164     comm->dlbState         = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
2165     dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2166     /* To consider turning DLB on after 2*nstlist steps we need to check
2167      * at partitioning count 3. Thus we need to increase the first count by 2.
2168      */
2169     comm->ddPartioningCountFirstDlbOff += 2;
2170
2171     GMX_LOG(mdlog.info).appendTextFormatted(
2172             "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
2173
2174     comm->bPMELoadBalDLBLimits = FALSE;
2175
2176     /* Allocate the charge group/atom sorting struct */
2177     comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
2178
2179     comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
2180
2181     /* We need to decide on update groups early, as this affects communication distances */
2182     comm->useUpdateGroups = false;
2183     if (ir->cutoff_scheme == ecutsVERLET)
2184     {
2185         real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2186         setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, cr->nnodes, comm);
2187     }
2188
2189     comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
2190                              mtop->bIntermolecularInteractions);
2191     if (comm->bInterCGBondeds)
2192     {
2193         comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
2194     }
2195     else
2196     {
2197         comm->bInterCGMultiBody = FALSE;
2198     }
2199
2200     if (comm->useUpdateGroups)
2201     {
2202         dd->splitConstraints = false;
2203         dd->splitSettles     = false;
2204     }
2205     else
2206     {
2207         dd->splitConstraints = gmx::inter_charge_group_constraints(*mtop);
2208         dd->splitSettles     = gmx::inter_charge_group_settles(*mtop);
2209     }
2210
2211     if (ir->rlist == 0)
2212     {
2213         /* Set the cut-off to some very large value,
2214          * so we don't need if statements everywhere in the code.
2215          * We use sqrt, since the cut-off is squared in some places.
2216          */
2217         comm->cutoff   = GMX_CUTOFF_INF;
2218     }
2219     else
2220     {
2221         comm->cutoff   = atomToAtomIntoDomainToDomainCutoff(*comm, ir->rlist);
2222     }
2223     comm->cutoff_mbody = 0;
2224
2225     /* Determine the minimum cell size limit, affected by many factors */
2226     comm->cellsize_limit = 0;
2227     comm->bBondComm      = FALSE;
2228
2229     /* We do not allow home atoms to move beyond the neighboring domain
2230      * between domain decomposition steps, which limits the cell size.
2231      * Get an estimate of cell size limit due to atom displacement.
2232      * In most cases this is a large overestimate, because it assumes
2233      * non-interaction atoms.
2234      * We set the chance to 1 in a trillion steps.
2235      */
2236     constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2237     const real     limitForAtomDisplacement          =
2238         minCellSizeForAtomDisplacement(*mtop, *ir,
2239                                        comm->updateGroupingPerMoleculetype,
2240                                        c_chanceThatAtomMovesBeyondDomain);
2241     GMX_LOG(mdlog.info).appendTextFormatted(
2242             "Minimum cell size due to atom displacement: %.3f nm",
2243             limitForAtomDisplacement);
2244
2245     comm->cellsize_limit = std::max(comm->cellsize_limit,
2246                                     limitForAtomDisplacement);
2247
2248     /* TODO: PME decomposition currently requires atoms not to be more than
2249      *       2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2250      *       In nearly all cases, limitForAtomDisplacement will be smaller
2251      *       than 2/3*rlist, so the PME requirement is satisfied.
2252      *       But it would be better for both correctness and performance
2253      *       to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2254      *       Note that we would need to improve the pairlist buffer case.
2255      */
2256
2257     if (comm->bInterCGBondeds)
2258     {
2259         if (options.minimumCommunicationRange > 0)
2260         {
2261             comm->cutoff_mbody = atomToAtomIntoDomainToDomainCutoff(*comm, options.minimumCommunicationRange);
2262             if (options.useBondedCommunication)
2263             {
2264                 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
2265             }
2266             else
2267             {
2268                 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
2269             }
2270             r_bonded_limit = comm->cutoff_mbody;
2271         }
2272         else if (ir->bPeriodicMols)
2273         {
2274             /* Can not easily determine the required cut-off */
2275             GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2276             comm->cutoff_mbody = comm->cutoff/2;
2277             r_bonded_limit     = comm->cutoff_mbody;
2278         }
2279         else
2280         {
2281             real r_2b, r_mb;
2282
2283             if (MASTER(cr))
2284             {
2285                 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2286                                       options.checkBondedInteractions,
2287                                       &r_2b, &r_mb);
2288             }
2289             gmx_bcast(sizeof(r_2b), &r_2b, cr);
2290             gmx_bcast(sizeof(r_mb), &r_mb, cr);
2291
2292             /* We use an initial margin of 10% for the minimum cell size,
2293              * except when we are just below the non-bonded cut-off.
2294              */
2295             if (options.useBondedCommunication)
2296             {
2297                 if (std::max(r_2b, r_mb) > comm->cutoff)
2298                 {
2299                     r_bonded        = std::max(r_2b, r_mb);
2300                     r_bonded_limit  = tenPercentMargin*r_bonded;
2301                     comm->bBondComm = TRUE;
2302                 }
2303                 else
2304                 {
2305                     r_bonded       = r_mb;
2306                     r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
2307                 }
2308                 /* We determine cutoff_mbody later */
2309             }
2310             else
2311             {
2312                 /* No special bonded communication,
2313                  * simply increase the DD cut-off.
2314                  */
2315                 r_bonded_limit     = tenPercentMargin*std::max(r_2b, r_mb);
2316                 comm->cutoff_mbody = r_bonded_limit;
2317                 comm->cutoff       = std::max(comm->cutoff, comm->cutoff_mbody);
2318             }
2319         }
2320         GMX_LOG(mdlog.info).appendTextFormatted(
2321                 "Minimum cell size due to bonded interactions: %.3f nm",
2322                 r_bonded_limit);
2323
2324         comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
2325     }
2326
2327     real rconstr = 0;
2328     if (dd->splitConstraints && options.constraintCommunicationRange <= 0)
2329     {
2330         /* There is a cell size limit due to the constraints (P-LINCS) */
2331         rconstr = gmx::constr_r_max(mdlog, mtop, ir);
2332         GMX_LOG(mdlog.info).appendTextFormatted(
2333                 "Estimated maximum distance required for P-LINCS: %.3f nm",
2334                 rconstr);
2335         if (rconstr > comm->cellsize_limit)
2336         {
2337             GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2338         }
2339     }
2340     else if (options.constraintCommunicationRange > 0)
2341     {
2342         /* Here we do not check for dd->splitConstraints.
2343          * because one can also set a cell size limit for virtual sites only
2344          * and at this point we don't know yet if there are intercg v-sites.
2345          */
2346         GMX_LOG(mdlog.info).appendTextFormatted(
2347                 "User supplied maximum distance required for P-LINCS: %.3f nm",
2348                 options.constraintCommunicationRange);
2349         rconstr = options.constraintCommunicationRange;
2350     }
2351     comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
2352
2353     comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2354
2355     if (options.numCells[XX] > 0)
2356     {
2357         copy_ivec(options.numCells, dd->nc);
2358         set_dd_dim(mdlog, dd);
2359         set_ddbox_cr(*cr, &dd->nc, *ir, box, xGlobal, ddbox);
2360
2361         if (options.numPmeRanks >= 0)
2362         {
2363             cr->npmenodes = options.numPmeRanks;
2364         }
2365         else
2366         {
2367             /* When the DD grid is set explicitly and -npme is set to auto,
2368              * don't use PME ranks. We check later if the DD grid is
2369              * compatible with the total number of ranks.
2370              */
2371             cr->npmenodes = 0;
2372         }
2373
2374         real acs = average_cellsize_min(dd, ddbox);
2375         if (acs < comm->cellsize_limit)
2376         {
2377             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2378                                  "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2379                                  acs, comm->cellsize_limit);
2380         }
2381     }
2382     else
2383     {
2384         set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2385
2386         /* We need to choose the optimal DD grid and possibly PME nodes */
2387         real limit =
2388             dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
2389                            options.numPmeRanks,
2390                            !isDlbDisabled(comm),
2391                            options.dlbScaling,
2392                            comm->cellsize_limit, comm->cutoff,
2393                            comm->bInterCGBondeds);
2394
2395         if (dd->nc[XX] == 0)
2396         {
2397             char     buf[STRLEN];
2398             gmx_bool bC = (dd->splitConstraints && rconstr > r_bonded_limit);
2399             sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2400                     !bC ? "-rdd" : "-rcon",
2401                     comm->dlbState != DlbState::offUser ? " or -dds" : "",
2402                     bC ? " or your LINCS settings" : "");
2403
2404             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2405                                  "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2406                                  "%s\n"
2407                                  "Look in the log file for details on the domain decomposition",
2408                                  cr->nnodes-cr->npmenodes, limit, buf);
2409         }
2410         set_dd_dim(mdlog, dd);
2411     }
2412
2413     GMX_LOG(mdlog.info).appendTextFormatted(
2414             "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2415             dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
2416
2417     dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2418     if (cr->nnodes - dd->nnodes != cr->npmenodes)
2419     {
2420         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2421                              "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2422                              dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
2423     }
2424     if (cr->npmenodes > dd->nnodes)
2425     {
2426         gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2427                              "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
2428     }
2429     if (cr->npmenodes > 0)
2430     {
2431         comm->npmenodes = cr->npmenodes;
2432     }
2433     else
2434     {
2435         comm->npmenodes = dd->nnodes;
2436     }
2437
2438     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2439     {
2440         /* The following choices should match those
2441          * in comm_cost_est in domdec_setup.c.
2442          * Note that here the checks have to take into account
2443          * that the decomposition might occur in a different order than xyz
2444          * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2445          * in which case they will not match those in comm_cost_est,
2446          * but since that is mainly for testing purposes that's fine.
2447          */
2448         if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
2449             comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
2450             getenv("GMX_PMEONEDD") == nullptr)
2451         {
2452             comm->npmedecompdim = 2;
2453             comm->npmenodes_x   = dd->nc[XX];
2454             comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
2455         }
2456         else
2457         {
2458             /* In case nc is 1 in both x and y we could still choose to
2459              * decompose pme in y instead of x, but we use x for simplicity.
2460              */
2461             comm->npmedecompdim = 1;
2462             if (dd->dim[0] == YY)
2463             {
2464                 comm->npmenodes_x = 1;
2465                 comm->npmenodes_y = comm->npmenodes;
2466             }
2467             else
2468             {
2469                 comm->npmenodes_x = comm->npmenodes;
2470                 comm->npmenodes_y = 1;
2471             }
2472         }
2473         GMX_LOG(mdlog.info).appendTextFormatted(
2474                 "PME domain decomposition: %d x %d x %d",
2475                 comm->npmenodes_x, comm->npmenodes_y, 1);
2476     }
2477     else
2478     {
2479         comm->npmedecompdim = 0;
2480         comm->npmenodes_x   = 0;
2481         comm->npmenodes_y   = 0;
2482     }
2483
2484     snew(comm->slb_frac, DIM);
2485     if (isDlbDisabled(comm))
2486     {
2487         comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2488         comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2489         comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2490     }
2491
2492     if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
2493     {
2494         if (comm->bBondComm || !isDlbDisabled(comm))
2495         {
2496             /* Set the bonded communication distance to halfway
2497              * the minimum and the maximum,
2498              * since the extra communication cost is nearly zero.
2499              */
2500             real acs           = average_cellsize_min(dd, ddbox);
2501             comm->cutoff_mbody = 0.5*(r_bonded + acs);
2502             if (!isDlbDisabled(comm))
2503             {
2504                 /* Check if this does not limit the scaling */
2505                 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2506                                               options.dlbScaling*acs);
2507             }
2508             if (!comm->bBondComm)
2509             {
2510                 /* Without bBondComm do not go beyond the n.b. cut-off */
2511                 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
2512                 if (comm->cellsize_limit >= comm->cutoff)
2513                 {
2514                     /* We don't loose a lot of efficieny
2515                      * when increasing it to the n.b. cut-off.
2516                      * It can even be slightly faster, because we need
2517                      * less checks for the communication setup.
2518                      */
2519                     comm->cutoff_mbody = comm->cutoff;
2520                 }
2521             }
2522             /* Check if we did not end up below our original limit */
2523             comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
2524
2525             if (comm->cutoff_mbody > comm->cellsize_limit)
2526             {
2527                 comm->cellsize_limit = comm->cutoff_mbody;
2528             }
2529         }
2530         /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2531     }
2532
2533     if (debug)
2534     {
2535         fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2536                 "cellsize limit %f\n",
2537                 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
2538     }
2539
2540     if (MASTER(cr))
2541     {
2542         check_dd_restrictions(dd, ir, mdlog);
2543     }
2544 }
2545
2546 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2547 {
2548     int   ncg, cg;
2549     char *bLocalCG;
2550
2551     ncg = ncg_mtop(mtop);
2552     snew(bLocalCG, ncg);
2553     for (cg = 0; cg < ncg; cg++)
2554     {
2555         bLocalCG[cg] = FALSE;
2556     }
2557
2558     return bLocalCG;
2559 }
2560
2561 void dd_init_bondeds(FILE *fplog,
2562                      gmx_domdec_t *dd,
2563                      const gmx_mtop_t *mtop,
2564                      const gmx_vsite_t *vsite,
2565                      const t_inputrec *ir,
2566                      gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2567 {
2568     gmx_domdec_comm_t *comm;
2569
2570     dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2571
2572     comm = dd->comm;
2573
2574     if (comm->bBondComm)
2575     {
2576         /* Communicate atoms beyond the cut-off for bonded interactions */
2577         comm = dd->comm;
2578
2579         comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2580
2581         comm->bLocalCG = init_bLocalCG(mtop);
2582     }
2583     else
2584     {
2585         /* Only communicate atoms based on cut-off */
2586         comm->cglink   = nullptr;
2587         comm->bLocalCG = nullptr;
2588     }
2589 }
2590
2591 static void writeSettings(gmx::TextWriter       *log,
2592                           gmx_domdec_t          *dd,
2593                           const gmx_mtop_t      *mtop,
2594                           const t_inputrec      *ir,
2595                           gmx_bool               bDynLoadBal,
2596                           real                   dlb_scale,
2597                           const gmx_ddbox_t     *ddbox)
2598 {
2599     gmx_domdec_comm_t *comm;
2600     int                d;
2601     ivec               np;
2602     real               limit, shrink;
2603
2604     comm = dd->comm;
2605
2606     if (bDynLoadBal)
2607     {
2608         log->writeString("The maximum number of communication pulses is:");
2609         for (d = 0; d < dd->ndim; d++)
2610         {
2611             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2612         }
2613         log->ensureLineBreak();
2614         log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2615         log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2616         log->writeString("The allowed shrink of domain decomposition cells is:");
2617         for (d = 0; d < DIM; d++)
2618         {
2619             if (dd->nc[d] > 1)
2620             {
2621                 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2622                 {
2623                     shrink = 0;
2624                 }
2625                 else
2626                 {
2627                     shrink =
2628                         comm->cellsize_min_dlb[d]/
2629                         (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2630                 }
2631                 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2632             }
2633         }
2634         log->ensureLineBreak();
2635     }
2636     else
2637     {
2638         set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2639         log->writeString("The initial number of communication pulses is:");
2640         for (d = 0; d < dd->ndim; d++)
2641         {
2642             log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2643         }
2644         log->ensureLineBreak();
2645         log->writeString("The initial domain decomposition cell size is:");
2646         for (d = 0; d < DIM; d++)
2647         {
2648             if (dd->nc[d] > 1)
2649             {
2650                 log->writeStringFormatted(" %c %.2f nm",
2651                                           dim2char(d), dd->comm->cellsize_min[d]);
2652             }
2653         }
2654         log->ensureLineBreak();
2655         log->writeLine();
2656     }
2657
2658     gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
2659
2660     if (comm->bInterCGBondeds ||
2661         bInterCGVsites ||
2662         dd->splitConstraints || dd->splitSettles)
2663     {
2664         log->writeLine("The maximum allowed distance for charge groups involved in interactions is:");
2665         log->writeLineFormatted("%40s  %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
2666
2667         if (bDynLoadBal)
2668         {
2669             limit = dd->comm->cellsize_limit;
2670         }
2671         else
2672         {
2673             if (dynamic_dd_box(*dd))
2674             {
2675                 log->writeLine("(the following are initial values, they could change due to box deformation)");
2676             }
2677             limit = dd->comm->cellsize_min[XX];
2678             for (d = 1; d < DIM; d++)
2679             {
2680                 limit = std::min(limit, dd->comm->cellsize_min[d]);
2681             }
2682         }
2683
2684         if (comm->bInterCGBondeds)
2685         {
2686             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2687                                     "two-body bonded interactions", "(-rdd)",
2688                                     std::max(comm->cutoff, comm->cutoff_mbody));
2689             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2690                                     "multi-body bonded interactions", "(-rdd)",
2691                                     (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
2692         }
2693         if (bInterCGVsites)
2694         {
2695             log->writeLineFormatted("%40s  %-7s %6.3f nm",
2696                                     "virtual site constructions", "(-rcon)", limit);
2697         }
2698         if (dd->splitConstraints || dd->splitSettles)
2699         {
2700             std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2701                                                        1+ir->nProjOrder);
2702             log->writeLineFormatted("%40s  %-7s %6.3f nm\n",
2703                                     separation.c_str(), "(-rcon)", limit);
2704         }
2705         log->ensureLineBreak();
2706     }
2707 }
2708
2709 static void logSettings(const gmx::MDLogger &mdlog,
2710                         gmx_domdec_t        *dd,
2711                         const gmx_mtop_t    *mtop,
2712                         const t_inputrec    *ir,
2713                         real                 dlb_scale,
2714                         const gmx_ddbox_t   *ddbox)
2715 {
2716     gmx::StringOutputStream stream;
2717     gmx::TextWriter         log(&stream);
2718     writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2719     if (dd->comm->dlbState == DlbState::offCanTurnOn)
2720     {
2721         {
2722             log.ensureEmptyLine();
2723             log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2724         }
2725         writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2726     }
2727     GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2728 }
2729
2730 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2731                                 gmx_domdec_t        *dd,
2732                                 real                 dlb_scale,
2733                                 const t_inputrec    *ir,
2734                                 const gmx_ddbox_t   *ddbox)
2735 {
2736     gmx_domdec_comm_t *comm;
2737     int                d, dim, npulse, npulse_d_max, npulse_d;
2738     gmx_bool           bNoCutOff;
2739
2740     comm = dd->comm;
2741
2742     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2743
2744     /* Determine the maximum number of comm. pulses in one dimension */
2745
2746     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2747
2748     /* Determine the maximum required number of grid pulses */
2749     if (comm->cellsize_limit >= comm->cutoff)
2750     {
2751         /* Only a single pulse is required */
2752         npulse = 1;
2753     }
2754     else if (!bNoCutOff && comm->cellsize_limit > 0)
2755     {
2756         /* We round down slightly here to avoid overhead due to the latency
2757          * of extra communication calls when the cut-off
2758          * would be only slightly longer than the cell size.
2759          * Later cellsize_limit is redetermined,
2760          * so we can not miss interactions due to this rounding.
2761          */
2762         npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
2763     }
2764     else
2765     {
2766         /* There is no cell size limit */
2767         npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2768     }
2769
2770     if (!bNoCutOff && npulse > 1)
2771     {
2772         /* See if we can do with less pulses, based on dlb_scale */
2773         npulse_d_max = 0;
2774         for (d = 0; d < dd->ndim; d++)
2775         {
2776             dim      = dd->dim[d];
2777             npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
2778                                         /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2779             npulse_d_max = std::max(npulse_d_max, npulse_d);
2780         }
2781         npulse = std::min(npulse, npulse_d_max);
2782     }
2783
2784     /* This env var can override npulse */
2785     d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2786     if (d > 0)
2787     {
2788         npulse = d;
2789     }
2790
2791     comm->maxpulse       = 1;
2792     comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2793     for (d = 0; d < dd->ndim; d++)
2794     {
2795         comm->cd[d].np_dlb    = std::min(npulse, dd->nc[dd->dim[d]]-1);
2796         comm->maxpulse        = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2797         if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2798         {
2799             comm->bVacDLBNoLimit = FALSE;
2800         }
2801     }
2802
2803     /* cellsize_limit is set for LINCS in init_domain_decomposition */
2804     if (!comm->bVacDLBNoLimit)
2805     {
2806         comm->cellsize_limit = std::max(comm->cellsize_limit,
2807                                         comm->cutoff/comm->maxpulse);
2808     }
2809     comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2810     /* Set the minimum cell size for each DD dimension */
2811     for (d = 0; d < dd->ndim; d++)
2812     {
2813         if (comm->bVacDLBNoLimit ||
2814             comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
2815         {
2816             comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2817         }
2818         else
2819         {
2820             comm->cellsize_min_dlb[dd->dim[d]] =
2821                 comm->cutoff/comm->cd[d].np_dlb;
2822         }
2823     }
2824     if (comm->cutoff_mbody <= 0)
2825     {
2826         comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
2827     }
2828     if (isDlbOn(comm))
2829     {
2830         set_dlb_limits(dd);
2831     }
2832 }
2833
2834 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2835 {
2836     /* If each molecule is a single charge group
2837      * or we use domain decomposition for each periodic dimension,
2838      * we do not need to take pbc into account for the bonded interactions.
2839      */
2840     return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
2841             !(dd->nc[XX] > 1 &&
2842               dd->nc[YY] > 1 &&
2843               (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2844 }
2845
2846 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2847 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2848                                   gmx_domdec_t *dd, real dlb_scale,
2849                                   const gmx_mtop_t *mtop, const t_inputrec *ir,
2850                                   const gmx_ddbox_t *ddbox)
2851 {
2852     gmx_domdec_comm_t *comm;
2853     int                natoms_tot;
2854     real               vol_frac;
2855
2856     comm = dd->comm;
2857
2858     if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2859     {
2860         init_ddpme(dd, &comm->ddpme[0], 0);
2861         if (comm->npmedecompdim >= 2)
2862         {
2863             init_ddpme(dd, &comm->ddpme[1], 1);
2864         }
2865     }
2866     else
2867     {
2868         comm->npmenodes = 0;
2869         if (dd->pme_nodeid >= 0)
2870         {
2871             gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2872                                  "Can not have separate PME ranks without PME electrostatics");
2873         }
2874     }
2875
2876     if (debug)
2877     {
2878         fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
2879     }
2880     if (!isDlbDisabled(comm))
2881     {
2882         set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2883     }
2884
2885     logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2886
2887     if (ir->ePBC == epbcNONE)
2888     {
2889         vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2890     }
2891     else
2892     {
2893         vol_frac =
2894             (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
2895     }
2896     if (debug)
2897     {
2898         fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2899     }
2900     natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2901
2902     dd->ga2la  = new gmx_ga2la_t(natoms_tot,
2903                                  static_cast<int>(vol_frac*natoms_tot));
2904 }
2905
2906 /*! \brief Set some important DD parameters that can be modified by env.vars */
2907 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
2908                                   gmx_domdec_t *dd, int rank_mysim)
2909 {
2910     gmx_domdec_comm_t *comm = dd->comm;
2911
2912     dd->bSendRecv2      = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2913     comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2914     comm->eFlop         = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2915     int recload         = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2916     comm->nstDDDump     = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2917     comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2918     comm->DD_debug      = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2919
2920     if (dd->bSendRecv2)
2921     {
2922         GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2923     }
2924
2925     if (comm->eFlop)
2926     {
2927         GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2928         if (comm->eFlop > 1)
2929         {
2930             srand(1 + rank_mysim);
2931         }
2932         comm->bRecordLoad = TRUE;
2933     }
2934     else
2935     {
2936         comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
2937     }
2938 }
2939
2940 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger           &mdlog,
2941                                         t_commrec                     *cr,
2942                                         const DomdecOptions           &options,
2943                                         const MdrunOptions            &mdrunOptions,
2944                                         const gmx_mtop_t              *mtop,
2945                                         const t_inputrec              *ir,
2946                                         const matrix                   box,
2947                                         gmx::ArrayRef<const gmx::RVec> xGlobal,
2948                                         gmx::LocalAtomSetManager      *atomSets)
2949 {
2950     gmx_domdec_t      *dd;
2951
2952     GMX_LOG(mdlog.info).appendTextFormatted(
2953             "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
2954
2955     dd = new gmx_domdec_t;
2956
2957     dd->comm = init_dd_comm();
2958
2959     /* Initialize DD paritioning counters */
2960     dd->comm->partition_step = INT_MIN;
2961     dd->ddp_count            = 0;
2962
2963     set_dd_envvar_options(mdlog, dd, cr->nodeid);
2964
2965     gmx_ddbox_t ddbox = {0};
2966     set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
2967                            mtop, ir,
2968                            box, xGlobal,
2969                            &ddbox);
2970
2971     make_dd_communicators(mdlog, cr, dd, options.rankOrder);
2972
2973     if (thisRankHasDuty(cr, DUTY_PP))
2974     {
2975         set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
2976
2977         setup_neighbor_relations(dd);
2978     }
2979
2980     /* Set overallocation to avoid frequent reallocation of arrays */
2981     set_over_alloc_dd(TRUE);
2982
2983     clear_dd_cycle_counts(dd);
2984
2985     dd->atomSets = atomSets;
2986
2987     return dd;
2988 }
2989
2990 static gmx_bool test_dd_cutoff(t_commrec     *cr,
2991                                const t_state &state,
2992                                real           cutoffRequested)
2993 {
2994     gmx_domdec_t *dd;
2995     gmx_ddbox_t   ddbox;
2996     int           d, dim, np;
2997     real          inv_cell_size;
2998     int           LocallyLimited;
2999
3000     dd = cr->dd;
3001
3002     set_ddbox(*dd, false, state.box, true, state.x, &ddbox);
3003
3004     LocallyLimited = 0;
3005
3006     for (d = 0; d < dd->ndim; d++)
3007     {
3008         dim = dd->dim[d];
3009
3010         inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3011         if (dynamic_dd_box(*dd))
3012         {
3013             inv_cell_size *= DD_PRES_SCALE_MARGIN;
3014         }
3015
3016         np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3017
3018         if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3019         {
3020             if (np > dd->comm->cd[d].np_dlb)
3021             {
3022                 return FALSE;
3023             }
3024
3025             /* If a current local cell size is smaller than the requested
3026              * cut-off, we could still fix it, but this gets very complicated.
3027              * Without fixing here, we might actually need more checks.
3028              */
3029             real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3030             if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3031             {
3032                 LocallyLimited = 1;
3033             }
3034         }
3035     }
3036
3037     if (!isDlbDisabled(dd->comm))
3038     {
3039         /* If DLB is not active yet, we don't need to check the grid jumps.
3040          * Actually we shouldn't, because then the grid jump data is not set.
3041          */
3042         if (isDlbOn(dd->comm) &&
3043             check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3044         {
3045             LocallyLimited = 1;
3046         }
3047
3048         gmx_sumi(1, &LocallyLimited, cr);
3049
3050         if (LocallyLimited > 0)
3051         {
3052             return FALSE;
3053         }
3054     }
3055
3056     return TRUE;
3057 }
3058
3059 gmx_bool change_dd_cutoff(t_commrec     *cr,
3060                           const t_state &state,
3061                           real           cutoffRequested)
3062 {
3063     gmx_bool bCutoffAllowed;
3064
3065     bCutoffAllowed = test_dd_cutoff(cr, state, cutoffRequested);
3066
3067     if (bCutoffAllowed)
3068     {
3069         cr->dd->comm->cutoff = cutoffRequested;
3070     }
3071
3072     return bCutoffAllowed;
3073 }