2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/options.h"
59 #include "gromacs/domdec/partition.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/gmxlib/nrnb.h"
62 #include "gromacs/gpu_utils/gpu_utils.h"
63 #include "gromacs/hardware/hw_info.h"
64 #include "gromacs/listed_forces/manage_threading.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/calc_verletbuf.h"
68 #include "gromacs/mdlib/constr.h"
69 #include "gromacs/mdlib/constraintrange.h"
70 #include "gromacs/mdlib/updategroups.h"
71 #include "gromacs/mdlib/vsite.h"
72 #include "gromacs/mdtypes/commrec.h"
73 #include "gromacs/mdtypes/forceoutput.h"
74 #include "gromacs/mdtypes/inputrec.h"
75 #include "gromacs/mdtypes/mdrunoptions.h"
76 #include "gromacs/mdtypes/state.h"
77 #include "gromacs/pbcutil/ishift.h"
78 #include "gromacs/pbcutil/pbc.h"
79 #include "gromacs/pulling/pull.h"
80 #include "gromacs/timing/wallcycle.h"
81 #include "gromacs/topology/block.h"
82 #include "gromacs/topology/idef.h"
83 #include "gromacs/topology/ifunc.h"
84 #include "gromacs/topology/mtop_lookup.h"
85 #include "gromacs/topology/mtop_util.h"
86 #include "gromacs/topology/topology.h"
87 #include "gromacs/utility/basedefinitions.h"
88 #include "gromacs/utility/basenetwork.h"
89 #include "gromacs/utility/cstringutil.h"
90 #include "gromacs/utility/exceptions.h"
91 #include "gromacs/utility/fatalerror.h"
92 #include "gromacs/utility/gmxmpi.h"
93 #include "gromacs/utility/logger.h"
94 #include "gromacs/utility/real.h"
95 #include "gromacs/utility/smalloc.h"
96 #include "gromacs/utility/strconvert.h"
97 #include "gromacs/utility/stringstream.h"
98 #include "gromacs/utility/stringutil.h"
99 #include "gromacs/utility/textwriter.h"
101 #include "atomdistribution.h"
103 #include "cellsizes.h"
104 #include "distribute.h"
105 #include "domdec_constraints.h"
106 #include "domdec_internal.h"
107 #include "domdec_setup.h"
108 #include "domdec_vsite.h"
109 #include "redistribute.h"
112 // TODO remove this when moving domdec into gmx namespace
113 using gmx::DdRankOrder;
114 using gmx::DlbOption;
115 using gmx::DomdecOptions;
117 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
119 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
122 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
123 #define DD_FLAG_NRCG 65535
124 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
125 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
127 /* The DD zone order */
128 static const ivec dd_zo[DD_MAXZONE] =
129 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
131 /* The non-bonded zone-pair setup for domain decomposition
132 * The first number is the i-zone, the second number the first j-zone seen by
133 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
134 * As is, this is for 3D decomposition, where there are 4 i-zones.
135 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
136 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
139 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
146 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
148 static void index2xyz(ivec nc,int ind,ivec xyz)
150 xyz[XX] = ind % nc[XX];
151 xyz[YY] = (ind / nc[XX]) % nc[YY];
152 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
156 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
158 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
159 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
160 xyz[ZZ] = ind % nc[ZZ];
163 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
167 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
168 const int ddindex = dd_index(dd->nc, c);
169 if (cartSetup.bCartesianPP_PME)
171 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
173 else if (cartSetup.bCartesianPP)
176 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
187 int ddglatnr(const gmx_domdec_t *dd, int i)
197 if (i >= dd->comm->atomRanges.numAtomsTotal())
199 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
201 atnr = dd->globalAtomIndices[i] + 1;
207 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
209 return &dd->comm->cgs_gl;
212 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
214 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
215 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
218 void dd_store_state(gmx_domdec_t *dd, t_state *state)
222 if (state->ddp_count != dd->ddp_count)
224 gmx_incons("The MD state does not match the domain decomposition state");
227 state->cg_gl.resize(dd->ncg_home);
228 for (i = 0; i < dd->ncg_home; i++)
230 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
233 state->ddp_count_cg_gl = dd->ddp_count;
236 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
238 return &dd->comm->zones;
241 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
242 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
244 gmx_domdec_zones_t *zones;
247 zones = &dd->comm->zones;
250 while (icg >= zones->izone[izone].cg1)
259 else if (izone < zones->nizone)
261 *jcg0 = zones->izone[izone].jcg0;
265 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
266 icg, izone, zones->nizone);
269 *jcg1 = zones->izone[izone].jcg1;
271 for (d = 0; d < dd->ndim; d++)
274 shift0[dim] = zones->izone[izone].shift0[dim];
275 shift1[dim] = zones->izone[izone].shift1[dim];
276 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
278 /* A conservative approach, this can be optimized */
285 int dd_numHomeAtoms(const gmx_domdec_t &dd)
287 return dd.comm->atomRanges.numHomeAtoms();
290 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
292 /* We currently set mdatoms entries for all atoms:
293 * local + non-local + communicated for vsite + constraints
296 return dd->comm->atomRanges.numAtomsTotal();
299 int dd_natoms_vsite(const gmx_domdec_t *dd)
301 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
304 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
306 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
307 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
310 void dd_move_x(gmx_domdec_t *dd,
312 gmx::ArrayRef<gmx::RVec> x,
313 gmx_wallcycle *wcycle)
315 wallcycle_start(wcycle, ewcMOVEX);
318 gmx_domdec_comm_t *comm;
319 gmx_domdec_comm_dim_t *cd;
320 rvec shift = {0, 0, 0};
321 gmx_bool bPBC, bScrew;
326 nat_tot = comm->atomRanges.numHomeAtoms();
327 for (int d = 0; d < dd->ndim; d++)
329 bPBC = (dd->ci[dd->dim[d]] == 0);
330 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
333 copy_rvec(box[dd->dim[d]], shift);
336 for (const gmx_domdec_ind_t &ind : cd->ind)
338 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
339 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
343 for (int j : ind.index)
345 sendBuffer[n] = x[j];
351 for (int j : ind.index)
353 /* We need to shift the coordinates */
354 for (int d = 0; d < DIM; d++)
356 sendBuffer[n][d] = x[j][d] + shift[d];
363 for (int j : ind.index)
366 sendBuffer[n][XX] = x[j][XX] + shift[XX];
368 * This operation requires a special shift force
369 * treatment, which is performed in calc_vir.
371 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
372 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
377 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
379 gmx::ArrayRef<gmx::RVec> receiveBuffer;
380 if (cd->receiveInPlace)
382 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
386 receiveBuffer = receiveBufferAccess.buffer;
388 /* Send and receive the coordinates */
389 ddSendrecv(dd, d, dddirBackward,
390 sendBuffer, receiveBuffer);
392 if (!cd->receiveInPlace)
395 for (int zone = 0; zone < nzone; zone++)
397 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
399 x[i] = receiveBuffer[j++];
403 nat_tot += ind.nrecv[nzone+1];
408 wallcycle_stop(wcycle, ewcMOVEX);
411 void dd_move_f(gmx_domdec_t *dd,
412 gmx::ForceWithShiftForces *forceWithShiftForces,
413 gmx_wallcycle *wcycle)
415 wallcycle_start(wcycle, ewcMOVEF);
417 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
418 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
420 gmx_domdec_comm_t &comm = *dd->comm;
421 int nzone = comm.zones.n/2;
422 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
423 for (int d = dd->ndim-1; d >= 0; d--)
425 /* Only forces in domains near the PBC boundaries need to
426 consider PBC in the treatment of fshift */
427 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
428 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
429 /* Determine which shift vector we need */
430 ivec vis = { 0, 0, 0 };
432 const int is = IVEC2IS(vis);
434 /* Loop over the pulses */
435 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
436 for (int p = cd.numPulses() - 1; p >= 0; p--)
438 const gmx_domdec_ind_t &ind = cd.ind[p];
439 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
440 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
442 nat_tot -= ind.nrecv[nzone+1];
444 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
446 gmx::ArrayRef<gmx::RVec> sendBuffer;
447 if (cd.receiveInPlace)
449 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
453 sendBuffer = sendBufferAccess.buffer;
455 for (int zone = 0; zone < nzone; zone++)
457 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
459 sendBuffer[j++] = f[i];
463 /* Communicate the forces */
464 ddSendrecv(dd, d, dddirForward,
465 sendBuffer, receiveBuffer);
466 /* Add the received forces */
468 if (!shiftForcesNeedPbc)
470 for (int j : ind.index)
472 for (int d = 0; d < DIM; d++)
474 f[j][d] += receiveBuffer[n][d];
479 else if (!applyScrewPbc)
481 for (int j : ind.index)
483 for (int d = 0; d < DIM; d++)
485 f[j][d] += receiveBuffer[n][d];
487 /* Add this force to the shift force */
488 for (int d = 0; d < DIM; d++)
490 fshift[is][d] += receiveBuffer[n][d];
497 for (int j : ind.index)
499 /* Rotate the force */
500 f[j][XX] += receiveBuffer[n][XX];
501 f[j][YY] -= receiveBuffer[n][YY];
502 f[j][ZZ] -= receiveBuffer[n][ZZ];
503 if (shiftForcesNeedPbc)
505 /* Add this force to the shift force */
506 for (int d = 0; d < DIM; d++)
508 fshift[is][d] += receiveBuffer[n][d];
517 wallcycle_stop(wcycle, ewcMOVEF);
520 /* Convenience function for extracting a real buffer from an rvec buffer
522 * To reduce the number of temporary communication buffers and avoid
523 * cache polution, we reuse gmx::RVec buffers for storing reals.
524 * This functions return a real buffer reference with the same number
525 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
527 static gmx::ArrayRef<real>
528 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
530 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
534 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
537 gmx_domdec_comm_t *comm;
538 gmx_domdec_comm_dim_t *cd;
543 nat_tot = comm->atomRanges.numHomeAtoms();
544 for (int d = 0; d < dd->ndim; d++)
547 for (const gmx_domdec_ind_t &ind : cd->ind)
549 /* Note: We provision for RVec instead of real, so a factor of 3
550 * more than needed. The buffer actually already has this size
551 * and we pass a plain pointer below, so this does not matter.
553 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
554 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
556 for (int j : ind.index)
558 sendBuffer[n++] = v[j];
561 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
563 gmx::ArrayRef<real> receiveBuffer;
564 if (cd->receiveInPlace)
566 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
570 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
572 /* Send and receive the data */
573 ddSendrecv(dd, d, dddirBackward,
574 sendBuffer, receiveBuffer);
575 if (!cd->receiveInPlace)
578 for (int zone = 0; zone < nzone; zone++)
580 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
582 v[i] = receiveBuffer[j++];
586 nat_tot += ind.nrecv[nzone+1];
592 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
595 gmx_domdec_comm_t *comm;
596 gmx_domdec_comm_dim_t *cd;
600 nzone = comm->zones.n/2;
601 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
602 for (int d = dd->ndim-1; d >= 0; d--)
605 for (int p = cd->numPulses() - 1; p >= 0; p--)
607 const gmx_domdec_ind_t &ind = cd->ind[p];
609 /* Note: We provision for RVec instead of real, so a factor of 3
610 * more than needed. The buffer actually already has this size
611 * and we typecast, so this works as intended.
613 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
614 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
615 nat_tot -= ind.nrecv[nzone + 1];
617 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
619 gmx::ArrayRef<real> sendBuffer;
620 if (cd->receiveInPlace)
622 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
626 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
628 for (int zone = 0; zone < nzone; zone++)
630 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
632 sendBuffer[j++] = v[i];
636 /* Communicate the forces */
637 ddSendrecv(dd, d, dddirForward,
638 sendBuffer, receiveBuffer);
639 /* Add the received forces */
641 for (int j : ind.index)
643 v[j] += receiveBuffer[n];
651 real dd_cutoff_multibody(const gmx_domdec_t *dd)
653 const gmx_domdec_comm_t &comm = *dd->comm;
654 const DDSystemInfo &systemInfo = comm.systemInfo;
657 if (systemInfo.haveInterDomainMultiBodyBondeds)
659 if (comm.cutoff_mbody > 0)
661 r = comm.cutoff_mbody;
665 /* cutoff_mbody=0 means we do not have DLB */
666 r = comm.cellsize_min[dd->dim[0]];
667 for (int di = 1; di < dd->ndim; di++)
669 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
671 if (comm.systemInfo.filterBondedCommunication)
673 r = std::max(r, comm.cutoff_mbody);
677 r = std::min(r, systemInfo.cutoff);
685 real dd_cutoff_twobody(const gmx_domdec_t *dd)
689 r_mb = dd_cutoff_multibody(dd);
691 return std::max(dd->comm->systemInfo.cutoff, r_mb);
695 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
696 const CartesianRankSetup &cartSetup,
700 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
701 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
702 copy_ivec(coord, coord_pme);
703 coord_pme[cartSetup.cartpmedim] =
704 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
707 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
708 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
709 const int ddCellIndex)
711 const int npp = ddRankSetup.numPPRanks;
712 const int npme = ddRankSetup.numRanksDoingPme;
714 /* Here we assign a PME node to communicate with this DD node
715 * by assuming that the major index of both is x.
716 * We add npme/2 to obtain an even distribution.
718 return (ddCellIndex*npme + npme/2)/npp;
721 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
723 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
726 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
728 const int p0 = ddindex2pmeindex(ddRankSetup, i);
729 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
730 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
734 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
736 pmeRanks[n] = i + 1 + n;
744 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
752 if (dd->comm->bCartesian) {
753 gmx_ddindex2xyz(dd->nc,ddindex,coords);
754 dd_coords2pmecoords(dd,coords,coords_pme);
755 copy_ivec(dd->ntot,nc);
756 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
757 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
759 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
761 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
767 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
772 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
774 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
775 ivec coords = { x, y, z };
778 if (cartSetup.bCartesianPP_PME)
781 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
786 int ddindex = dd_index(cr->dd->nc, coords);
787 if (cartSetup.bCartesianPP)
789 nodeid = cartSetup.ddindex2simnodeid[ddindex];
793 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
795 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
807 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
808 const CartesianRankSetup &cartSetup,
809 gmx::ArrayRef<const int> pmeRanks,
810 const t_commrec gmx_unused *cr,
811 const int sim_nodeid)
815 /* This assumes a uniform x domain decomposition grid cell size */
816 if (cartSetup.bCartesianPP_PME)
819 ivec coord, coord_pme;
820 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
821 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
823 /* This is a PP rank */
824 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
825 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
829 else if (cartSetup.bCartesianPP)
831 if (sim_nodeid < ddRankSetup.numPPRanks)
833 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
838 /* This assumes DD cells with identical x coordinates
839 * are numbered sequentially.
841 if (pmeRanks.empty())
843 if (sim_nodeid < ddRankSetup.numPPRanks)
845 /* The DD index equals the nodeid */
846 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
852 while (sim_nodeid > pmeRanks[i])
856 if (sim_nodeid < pmeRanks[i])
858 pmenode = pmeRanks[i];
866 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
871 dd->comm->ddRankSetup.npmenodes_x,
872 dd->comm->ddRankSetup.npmenodes_y
883 std::vector<int> get_pme_ddranks(const t_commrec *cr,
886 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
887 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
888 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
889 std::vector<int> ddranks;
890 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
892 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
894 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
896 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
898 if (cartSetup.bCartesianPP_PME)
900 ivec coord = { x, y, z };
902 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
903 if (cr->dd->ci[XX] == coord_pme[XX] &&
904 cr->dd->ci[YY] == coord_pme[YY] &&
905 cr->dd->ci[ZZ] == coord_pme[ZZ])
907 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
912 /* The slab corresponds to the nodeid in the PME group */
913 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
915 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
924 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
925 gmx::ArrayRef<const int> pmeRanks,
928 gmx_bool bReceive = TRUE;
930 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
931 if (ddRankSetup.usePmeOnlyRanks)
933 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
934 if (cartSetup.bCartesianPP_PME)
937 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
939 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
940 coords[cartSetup.cartpmedim]++;
941 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
944 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
945 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
947 /* This is not the last PP node for pmenode */
952 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
957 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
958 if (cr->sim_nodeid+1 < cr->nnodes &&
959 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
961 /* This is not the last PP node for pmenode */
970 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
972 gmx_domdec_comm_t *comm;
977 snew(*dim_f, dd->nc[dim]+1);
979 for (i = 1; i < dd->nc[dim]; i++)
981 if (comm->slb_frac[dim])
983 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
987 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
990 (*dim_f)[dd->nc[dim]] = 1;
993 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
995 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
997 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
1003 ddpme->dim = dimind;
1005 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1007 ddpme->nslab = (ddpme->dim == 0 ?
1008 ddRankSetup.npmenodes_x :
1009 ddRankSetup.npmenodes_y);
1011 if (ddpme->nslab <= 1)
1016 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
1017 /* Determine for each PME slab the PP location range for dimension dim */
1018 snew(ddpme->pp_min, ddpme->nslab);
1019 snew(ddpme->pp_max, ddpme->nslab);
1020 for (int slab = 0; slab < ddpme->nslab; slab++)
1022 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1023 ddpme->pp_max[slab] = 0;
1025 for (int i = 0; i < dd->nnodes; i++)
1028 ddindex2xyz(dd->nc, i, xyz);
1029 /* For y only use our y/z slab.
1030 * This assumes that the PME x grid size matches the DD grid size.
1032 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1034 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
1038 slab = pmeindex/nso;
1042 slab = pmeindex % ddpme->nslab;
1044 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1045 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1049 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1052 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1054 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1056 if (ddRankSetup.ddpme[0].dim == XX)
1058 return ddRankSetup.ddpme[0].maxshift;
1066 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1068 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1070 if (ddRankSetup.ddpme[0].dim == YY)
1072 return ddRankSetup.ddpme[0].maxshift;
1074 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1076 return ddRankSetup.ddpme[1].maxshift;
1084 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1086 return dd.comm->systemInfo.haveSplitConstraints;
1089 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1091 /* Note that the cycles value can be incorrect, either 0 or some
1092 * extremely large value, when our thread migrated to another core
1093 * with an unsynchronized cycle counter. If this happens less often
1094 * that once per nstlist steps, this will not cause issues, since
1095 * we later subtract the maximum value from the sum over nstlist steps.
1096 * A zero count will slightly lower the total, but that's a small effect.
1097 * Note that the main purpose of the subtraction of the maximum value
1098 * is to avoid throwing off the load balancing when stalls occur due
1099 * e.g. system activity or network congestion.
1101 dd->comm->cycl[ddCycl] += cycles;
1102 dd->comm->cycl_n[ddCycl]++;
1103 if (cycles > dd->comm->cycl_max[ddCycl])
1105 dd->comm->cycl_max[ddCycl] = cycles;
1110 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1115 gmx_bool bPartOfGroup = FALSE;
1117 dim = dd->dim[dim_ind];
1118 copy_ivec(loc, loc_c);
1119 for (i = 0; i < dd->nc[dim]; i++)
1122 rank = dd_index(dd->nc, loc_c);
1123 if (rank == dd->rank)
1125 /* This process is part of the group */
1126 bPartOfGroup = TRUE;
1129 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1133 dd->comm->mpi_comm_load[dim_ind] = c_row;
1134 if (!isDlbDisabled(dd->comm))
1136 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1138 if (dd->ci[dim] == dd->master_ci[dim])
1140 /* This is the root process of this row */
1141 cellsizes.rowMaster = std::make_unique<RowMaster>();
1143 RowMaster &rowMaster = *cellsizes.rowMaster;
1144 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1145 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1146 rowMaster.isCellMin.resize(dd->nc[dim]);
1149 rowMaster.bounds.resize(dd->nc[dim]);
1151 rowMaster.buf_ncd.resize(dd->nc[dim]);
1155 /* This is not a root process, we only need to receive cell_f */
1156 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1159 if (dd->ci[dim] == dd->master_ci[dim])
1161 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1167 void dd_setup_dlb_resource_sharing(t_commrec *cr,
1171 int physicalnode_id_hash;
1173 MPI_Comm mpi_comm_pp_physicalnode;
1175 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1177 /* Only ranks with short-ranged tasks (currently) use GPUs.
1178 * If we don't have GPUs assigned, there are no resources to share.
1183 physicalnode_id_hash = gmx_physicalnode_id_hash();
1189 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1190 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1191 dd->rank, physicalnode_id_hash, gpu_id);
1193 /* Split the PP communicator over the physical nodes */
1194 /* TODO: See if we should store this (before), as it's also used for
1195 * for the nodecomm summation.
1197 // TODO PhysicalNodeCommunicator could be extended/used to handle
1198 // the need for per-node per-group communicators.
1199 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1200 &mpi_comm_pp_physicalnode);
1201 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1202 &dd->comm->mpi_comm_gpu_shared);
1203 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1204 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1208 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1211 /* Note that some ranks could share a GPU, while others don't */
1213 if (dd->comm->nrank_gpu_shared == 1)
1215 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1218 GMX_UNUSED_VALUE(cr);
1219 GMX_UNUSED_VALUE(gpu_id);
1223 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1226 int dim0, dim1, i, j;
1231 fprintf(debug, "Making load communicators\n");
1234 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1235 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1243 make_load_communicator(dd, 0, loc);
1247 for (i = 0; i < dd->nc[dim0]; i++)
1250 make_load_communicator(dd, 1, loc);
1256 for (i = 0; i < dd->nc[dim0]; i++)
1260 for (j = 0; j < dd->nc[dim1]; j++)
1263 make_load_communicator(dd, 2, loc);
1270 fprintf(debug, "Finished making load communicators\n");
1275 /*! \brief Sets up the relation between neighboring domains and zones */
1276 static void setup_neighbor_relations(gmx_domdec_t *dd)
1278 int d, dim, i, j, m;
1280 gmx_domdec_zones_t *zones;
1281 gmx_domdec_ns_ranges_t *izone;
1282 GMX_ASSERT(dd->ndim >= 0, "Must have non-negative number of dimensions for DD");
1284 for (d = 0; d < dd->ndim; d++)
1287 copy_ivec(dd->ci, tmp);
1288 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1289 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1290 copy_ivec(dd->ci, tmp);
1291 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1292 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1295 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1298 dd->neighbor[d][1]);
1302 int nzone = (1 << dd->ndim);
1303 GMX_ASSERT(dd->ndim < DIM, "Invalid number of dimensions");
1304 int nizone = (1 << std::max(dd->ndim - 1, 0));
1305 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1307 zones = &dd->comm->zones;
1309 for (i = 0; i < nzone; i++)
1312 clear_ivec(zones->shift[i]);
1313 for (d = 0; d < dd->ndim; d++)
1315 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1320 for (i = 0; i < nzone; i++)
1322 for (d = 0; d < DIM; d++)
1324 s[d] = dd->ci[d] - zones->shift[i][d];
1329 else if (s[d] >= dd->nc[d])
1335 zones->nizone = nizone;
1336 for (i = 0; i < zones->nizone; i++)
1338 assert(ddNonbondedZonePairRanges[i][0] == i);
1340 izone = &zones->izone[i];
1341 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1342 * j-zones up to nzone.
1344 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1345 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1346 for (dim = 0; dim < DIM; dim++)
1348 if (dd->nc[dim] == 1)
1350 /* All shifts should be allowed */
1351 izone->shift0[dim] = -1;
1352 izone->shift1[dim] = 1;
1356 /* Determine the min/max j-zone shift wrt the i-zone */
1357 izone->shift0[dim] = 1;
1358 izone->shift1[dim] = -1;
1359 for (j = izone->j0; j < izone->j1; j++)
1361 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1362 if (shift_diff < izone->shift0[dim])
1364 izone->shift0[dim] = shift_diff;
1366 if (shift_diff > izone->shift1[dim])
1368 izone->shift1[dim] = shift_diff;
1375 if (!isDlbDisabled(dd->comm))
1377 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1380 if (dd->comm->ddSettings.recordLoad)
1382 make_load_communicators(dd);
1386 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1388 t_commrec gmx_unused *cr,
1389 bool gmx_unused reorder)
1392 gmx_domdec_comm_t *comm = dd->comm;
1393 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1395 if (cartSetup.bCartesianPP)
1397 /* Set up cartesian communication for the particle-particle part */
1398 GMX_LOG(mdlog.info).appendTextFormatted(
1399 "Will use a Cartesian communicator: %d x %d x %d",
1400 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1403 for (int i = 0; i < DIM; i++)
1408 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1410 /* We overwrite the old communicator with the new cartesian one */
1411 cr->mpi_comm_mygroup = comm_cart;
1414 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1415 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1417 if (cartSetup.bCartesianPP_PME)
1419 /* Since we want to use the original cartesian setup for sim,
1420 * and not the one after split, we need to make an index.
1422 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1423 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1424 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1425 /* Get the rank of the DD master,
1426 * above we made sure that the master node is a PP node.
1437 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1439 else if (cartSetup.bCartesianPP)
1441 if (!comm->ddRankSetup.usePmeOnlyRanks)
1443 /* The PP communicator is also
1444 * the communicator for this simulation
1446 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1448 cr->nodeid = dd->rank;
1450 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1452 /* We need to make an index to go from the coordinates
1453 * to the nodeid of this simulation.
1455 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1456 std::vector<int> buf(dd->nnodes);
1457 if (thisRankHasDuty(cr, DUTY_PP))
1459 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1461 /* Communicate the ddindex to simulation nodeid index */
1462 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1463 cr->mpi_comm_mysim);
1465 /* Determine the master coordinates and rank.
1466 * The DD master should be the same node as the master of this sim.
1468 for (int i = 0; i < dd->nnodes; i++)
1470 if (cartSetup.ddindex2simnodeid[i] == 0)
1472 ddindex2xyz(dd->nc, i, dd->master_ci);
1473 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1478 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1483 /* No Cartesian communicators */
1484 /* We use the rank in dd->comm->all as DD index */
1485 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1486 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1488 clear_ivec(dd->master_ci);
1492 GMX_LOG(mdlog.info).appendTextFormatted(
1493 "Domain decomposition rank %d, coordinates %d %d %d\n",
1494 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1498 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1499 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1503 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1507 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1509 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1511 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1512 std::vector<int> buf(dd->nnodes);
1513 if (thisRankHasDuty(cr, DUTY_PP))
1515 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1517 /* Communicate the ddindex to simulation nodeid index */
1518 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1519 cr->mpi_comm_mysim);
1522 GMX_UNUSED_VALUE(dd);
1523 GMX_UNUSED_VALUE(cr);
1527 static CartesianRankSetup
1528 split_communicator(const gmx::MDLogger &mdlog,
1530 const DdRankOrder ddRankOrder,
1531 bool gmx_unused reorder,
1532 const DDRankSetup &ddRankSetup,
1534 std::vector<int> *pmeRanks)
1536 CartesianRankSetup cartSetup;
1538 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1539 cartSetup.bCartesianPP_PME = false;
1541 const ivec &numDDCells = ddRankSetup.numPPCells;
1542 /* Initially we set ntot to the number of PP cells */
1543 copy_ivec(numDDCells, cartSetup.ntot);
1545 if (cartSetup.bCartesianPP)
1547 const int numDDCellsTot = ddRankSetup.numPPRanks;
1549 for (int i = 1; i < DIM; i++)
1551 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1553 if (bDiv[YY] || bDiv[ZZ])
1555 cartSetup.bCartesianPP_PME = TRUE;
1556 /* If we have 2D PME decomposition, which is always in x+y,
1557 * we stack the PME only nodes in z.
1558 * Otherwise we choose the direction that provides the thinnest slab
1559 * of PME only nodes as this will have the least effect
1560 * on the PP communication.
1561 * But for the PME communication the opposite might be better.
1563 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1565 numDDCells[YY] > numDDCells[ZZ]))
1567 cartSetup.cartpmedim = ZZ;
1571 cartSetup.cartpmedim = YY;
1573 cartSetup.ntot[cartSetup.cartpmedim]
1574 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1578 GMX_LOG(mdlog.info).appendTextFormatted(
1579 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1580 ddRankSetup.numRanksDoingPme,
1581 numDDCells[XX], numDDCells[YY],
1582 numDDCells[XX], numDDCells[ZZ]);
1583 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1587 if (cartSetup.bCartesianPP_PME)
1593 GMX_LOG(mdlog.info).appendTextFormatted(
1594 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1595 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1597 for (int i = 0; i < DIM; i++)
1602 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1604 MPI_Comm_rank(comm_cart, &rank);
1605 if (MASTER(cr) && rank != 0)
1607 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1610 /* With this assigment we loose the link to the original communicator
1611 * which will usually be MPI_COMM_WORLD, unless have multisim.
1613 cr->mpi_comm_mysim = comm_cart;
1614 cr->sim_nodeid = rank;
1616 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1618 GMX_LOG(mdlog.info).appendTextFormatted(
1619 "Cartesian rank %d, coordinates %d %d %d\n",
1620 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1622 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1626 if (!ddRankSetup.usePmeOnlyRanks ||
1627 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1629 cr->duty = DUTY_PME;
1632 /* Split the sim communicator into PP and PME only nodes */
1633 MPI_Comm_split(cr->mpi_comm_mysim,
1634 getThisRankDuties(cr),
1635 dd_index(cartSetup.ntot, ddCellIndex),
1636 &cr->mpi_comm_mygroup);
1638 GMX_UNUSED_VALUE(ddCellIndex);
1643 switch (ddRankOrder)
1645 case DdRankOrder::pp_pme:
1646 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1648 case DdRankOrder::interleave:
1649 /* Interleave the PP-only and PME-only ranks */
1650 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1651 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1653 case DdRankOrder::cartesian:
1656 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1659 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1661 cr->duty = DUTY_PME;
1668 /* Split the sim communicator into PP and PME only nodes */
1669 MPI_Comm_split(cr->mpi_comm_mysim,
1670 getThisRankDuties(cr),
1672 &cr->mpi_comm_mygroup);
1673 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1677 GMX_LOG(mdlog.info).appendTextFormatted(
1678 "This rank does only %s work.\n",
1679 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1684 /*! \brief Makes the PP communicator and the PME communicator, when needed
1686 * Returns the Cartesian rank setup.
1687 * Sets \p cr->mpi_comm_mygroup
1688 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1689 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1691 static CartesianRankSetup
1692 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1693 const DDSettings &ddSettings,
1694 const DdRankOrder ddRankOrder,
1695 const DDRankSetup &ddRankSetup,
1698 std::vector<int> *pmeRanks)
1700 CartesianRankSetup cartSetup;
1702 if (ddRankSetup.usePmeOnlyRanks)
1704 /* Split the communicator into a PP and PME part */
1706 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1707 ddRankSetup, ddCellIndex, pmeRanks);
1711 /* All nodes do PP and PME */
1712 /* We do not require separate communicators */
1713 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1715 cartSetup.bCartesianPP = false;
1716 cartSetup.bCartesianPP_PME = false;
1722 /*! \brief For PP ranks, sets or makes the communicator
1724 * For PME ranks get the rank id.
1725 * For PP only ranks, sets the PME-only rank.
1727 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1728 const DDSettings &ddSettings,
1729 gmx::ArrayRef<const int> pmeRanks,
1733 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1734 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1736 if (thisRankHasDuty(cr, DUTY_PP))
1738 /* Copy or make a new PP communicator */
1740 /* We (possibly) reordered the nodes in split_communicator,
1741 * so it is no longer required in make_pp_communicator.
1743 const bool useCartesianReorder =
1744 (ddSettings.useCartesianReorder &&
1745 !cartSetup.bCartesianPP_PME);
1747 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1751 receive_ddindex2simnodeid(dd, cr);
1754 if (!thisRankHasDuty(cr, DUTY_PME))
1756 /* Set up the commnuication to our PME node */
1757 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1758 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1761 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1762 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1767 dd->pme_nodeid = -1;
1770 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1773 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1774 dd->comm->cgs_gl.nr,
1775 dd->comm->cgs_gl.index[dd->comm->cgs_gl.nr]);
1779 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1780 const char *dir, int nc, const char *size_string)
1782 real *slb_frac, tot;
1787 if (nc > 1 && size_string != nullptr)
1789 GMX_LOG(mdlog.info).appendTextFormatted(
1790 "Using static load balancing for the %s direction", dir);
1793 for (i = 0; i < nc; i++)
1796 sscanf(size_string, "%20lf%n", &dbl, &n);
1799 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1806 std::string relativeCellSizes = "Relative cell sizes:";
1807 for (i = 0; i < nc; i++)
1810 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1812 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1818 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1821 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1823 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1825 for (auto &ilist : extractILists(*ilists, IF_BOND))
1827 if (NRAL(ilist.functionType) > 2)
1829 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1837 static int dd_getenv(const gmx::MDLogger &mdlog,
1838 const char *env_var, int def)
1844 val = getenv(env_var);
1847 if (sscanf(val, "%20d", &nst) <= 0)
1851 GMX_LOG(mdlog.info).appendTextFormatted(
1852 "Found env.var. %s = %s, using value %d",
1859 static void check_dd_restrictions(const gmx_domdec_t *dd,
1860 const t_inputrec *ir,
1861 const gmx::MDLogger &mdlog)
1863 if (ir->ePBC == epbcSCREW &&
1864 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1866 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1869 if (ir->ns_type == ensSIMPLE)
1871 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1874 if (ir->nstlist == 0)
1876 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1879 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1881 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1885 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1886 const ivec numDomains)
1888 real r = ddbox.box_size[XX];
1889 for (int d = 0; d < DIM; d++)
1891 if (numDomains[d] > 1)
1893 /* Check using the initial average cell size */
1894 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1901 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1903 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1904 const std::string &reasonStr,
1905 const gmx::MDLogger &mdlog)
1907 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1908 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1910 if (cmdlineDlbState == DlbState::onUser)
1912 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1914 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1916 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1918 return DlbState::offForever;
1921 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1923 * This function parses the parameters of "-dlb" command line option setting
1924 * corresponding state values. Then it checks the consistency of the determined
1925 * state with other run parameters and settings. As a result, the initial state
1926 * may be altered or an error may be thrown if incompatibility of options is detected.
1928 * \param [in] mdlog Logger.
1929 * \param [in] dlbOption Enum value for the DLB option.
1930 * \param [in] bRecordLoad True if the load balancer is recording load information.
1931 * \param [in] mdrunOptions Options for mdrun.
1932 * \param [in] ir Pointer mdrun to input parameters.
1933 * \returns DLB initial/startup state.
1935 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1936 DlbOption dlbOption, gmx_bool bRecordLoad,
1937 const gmx::MdrunOptions &mdrunOptions,
1938 const t_inputrec *ir)
1940 DlbState dlbState = DlbState::offCanTurnOn;
1944 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1945 case DlbOption::no: dlbState = DlbState::offUser; break;
1946 case DlbOption::yes: dlbState = DlbState::onUser; break;
1947 default: gmx_incons("Invalid dlbOption enum value");
1950 /* Reruns don't support DLB: bail or override auto mode */
1951 if (mdrunOptions.rerun)
1953 std::string reasonStr = "it is not supported in reruns.";
1954 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1957 /* Unsupported integrators */
1958 if (!EI_DYNAMICS(ir->eI))
1960 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1961 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1964 /* Without cycle counters we can't time work to balance on */
1967 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1968 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1971 if (mdrunOptions.reproducible)
1973 std::string reasonStr = "you started a reproducible run.";
1976 case DlbState::offUser:
1978 case DlbState::offForever:
1979 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1981 case DlbState::offCanTurnOn:
1982 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1983 case DlbState::onCanTurnOff:
1984 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1986 case DlbState::onUser:
1987 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1989 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1996 /* Sets the order of the DD dimensions, returns the number of DD dimensions */
1997 static int set_dd_dim(const ivec numDDCells,
1998 const DDSettings &ddSettings,
2002 if (ddSettings.useDDOrderZYX)
2004 /* Decomposition order z,y,x */
2005 for (int dim = DIM - 1; dim >= 0; dim--)
2007 if (numDDCells[dim] > 1)
2015 /* Decomposition order x,y,z */
2016 for (int dim = 0; dim < DIM; dim++)
2018 if (numDDCells[dim] > 1)
2027 /* Set dim[0] to avoid extra checks on ndim in several places */
2034 static gmx_domdec_comm_t *init_dd_comm()
2036 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2038 comm->n_load_have = 0;
2039 comm->n_load_collect = 0;
2041 comm->haveTurnedOffDlb = false;
2043 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2045 comm->sum_nat[i] = 0;
2049 comm->load_step = 0;
2052 clear_ivec(comm->load_lim);
2056 /* This should be replaced by a unique pointer */
2057 comm->balanceRegion = ddBalanceRegionAllocate();
2062 /* Returns whether mtop contains constraints and/or vsites */
2063 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2065 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2067 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2069 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2078 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2079 const gmx_mtop_t &mtop,
2080 const t_inputrec &inputrec,
2081 const real cutoffMargin,
2082 DDSystemInfo *systemInfo)
2084 /* When we have constraints and/or vsites, it is beneficial to use
2085 * update groups (when possible) to allow independent update of groups.
2087 if (!systemHasConstraintsOrVsites(mtop))
2089 /* No constraints or vsites, atoms can be updated independently */
2093 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2094 systemInfo->useUpdateGroups =
2095 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2096 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2098 if (systemInfo->useUpdateGroups)
2100 int numUpdateGroups = 0;
2101 for (const auto &molblock : mtop.molblock)
2103 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2106 systemInfo->maxUpdateGroupRadius =
2107 computeMaxUpdateGroupRadius(mtop,
2108 systemInfo->updateGroupingPerMoleculetype,
2109 maxReferenceTemperature(inputrec));
2111 /* To use update groups, the large domain-to-domain cutoff distance
2112 * should be compatible with the box size.
2114 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2116 if (systemInfo->useUpdateGroups)
2118 GMX_LOG(mdlog.info).appendTextFormatted(
2119 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2121 mtop.natoms/static_cast<double>(numUpdateGroups),
2122 systemInfo->maxUpdateGroupRadius);
2126 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2127 systemInfo->updateGroupingPerMoleculetype.clear();
2132 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2133 npbcdim(ePBC2npbcdim(ir.ePBC)),
2134 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2135 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2136 haveScrewPBC(ir.ePBC == epbcSCREW)
2140 /*! \brief Generate the simulation system information */
2142 getSystemInfo(const gmx::MDLogger &mdlog,
2144 const DomdecOptions &options,
2145 const gmx_mtop_t *mtop,
2146 const t_inputrec *ir,
2148 gmx::ArrayRef<const gmx::RVec> xGlobal)
2150 const real tenPercentMargin = 1.1;
2152 DDSystemInfo systemInfo;
2154 /* We need to decide on update groups early, as this affects communication distances */
2155 systemInfo.useUpdateGroups = false;
2156 if (ir->cutoff_scheme == ecutsVERLET)
2158 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2159 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2162 // TODO: Check whether all bondeds are within update groups
2163 systemInfo.haveInterDomainBondeds = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2164 mtop->bIntermolecularInteractions);
2165 systemInfo.haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2167 if (systemInfo.useUpdateGroups)
2169 systemInfo.haveSplitConstraints = false;
2170 systemInfo.haveSplitSettles = false;
2174 systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2175 systemInfo.haveSplitSettles = gmx::inter_charge_group_settles(*mtop);
2180 /* Set the cut-off to some very large value,
2181 * so we don't need if statements everywhere in the code.
2182 * We use sqrt, since the cut-off is squared in some places.
2184 systemInfo.cutoff = GMX_CUTOFF_INF;
2188 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2190 systemInfo.minCutoffForMultiBody = 0;
2192 /* Determine the minimum cell size limit, affected by many factors */
2193 systemInfo.cellsizeLimit = 0;
2194 systemInfo.filterBondedCommunication = false;
2196 /* We do not allow home atoms to move beyond the neighboring domain
2197 * between domain decomposition steps, which limits the cell size.
2198 * Get an estimate of cell size limit due to atom displacement.
2199 * In most cases this is a large overestimate, because it assumes
2200 * non-interaction atoms.
2201 * We set the chance to 1 in a trillion steps.
2203 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2204 const real limitForAtomDisplacement =
2205 minCellSizeForAtomDisplacement(*mtop, *ir,
2206 systemInfo.updateGroupingPerMoleculetype,
2207 c_chanceThatAtomMovesBeyondDomain);
2208 GMX_LOG(mdlog.info).appendTextFormatted(
2209 "Minimum cell size due to atom displacement: %.3f nm",
2210 limitForAtomDisplacement);
2212 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2213 limitForAtomDisplacement);
2215 /* TODO: PME decomposition currently requires atoms not to be more than
2216 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2217 * In nearly all cases, limitForAtomDisplacement will be smaller
2218 * than 2/3*rlist, so the PME requirement is satisfied.
2219 * But it would be better for both correctness and performance
2220 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2221 * Note that we would need to improve the pairlist buffer case.
2224 if (systemInfo.haveInterDomainBondeds)
2226 if (options.minimumCommunicationRange > 0)
2228 systemInfo.minCutoffForMultiBody =
2229 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2230 if (options.useBondedCommunication)
2232 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2236 systemInfo.cutoff = std::max(systemInfo.cutoff,
2237 systemInfo.minCutoffForMultiBody);
2240 else if (ir->bPeriodicMols)
2242 /* Can not easily determine the required cut-off */
2243 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2244 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2252 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2253 options.checkBondedInteractions,
2256 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2257 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2259 /* We use an initial margin of 10% for the minimum cell size,
2260 * except when we are just below the non-bonded cut-off.
2262 if (options.useBondedCommunication)
2264 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2266 const real r_bonded = std::max(r_2b, r_mb);
2267 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2268 /* This is the (only) place where we turn on the filtering */
2269 systemInfo.filterBondedCommunication = true;
2273 const real r_bonded = r_mb;
2274 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2277 /* We determine cutoff_mbody later */
2278 systemInfo.increaseMultiBodyCutoff = true;
2282 /* No special bonded communication,
2283 * simply increase the DD cut-off.
2285 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2286 systemInfo.cutoff = std::max(systemInfo.cutoff,
2287 systemInfo.minCutoffForMultiBody);
2290 GMX_LOG(mdlog.info).appendTextFormatted(
2291 "Minimum cell size due to bonded interactions: %.3f nm",
2292 systemInfo.minCutoffForMultiBody);
2294 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2295 systemInfo.minCutoffForMultiBody);
2298 systemInfo.constraintCommunicationRange = 0;
2299 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2301 /* There is a cell size limit due to the constraints (P-LINCS) */
2302 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2303 GMX_LOG(mdlog.info).appendTextFormatted(
2304 "Estimated maximum distance required for P-LINCS: %.3f nm",
2305 systemInfo.constraintCommunicationRange);
2306 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2308 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2311 else if (options.constraintCommunicationRange > 0)
2313 /* Here we do not check for dd->splitConstraints.
2314 * because one can also set a cell size limit for virtual sites only
2315 * and at this point we don't know yet if there are intercg v-sites.
2317 GMX_LOG(mdlog.info).appendTextFormatted(
2318 "User supplied maximum distance required for P-LINCS: %.3f nm",
2319 options.constraintCommunicationRange);
2320 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2322 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2323 systemInfo.constraintCommunicationRange);
2328 /*! \brief Set the cell size and interaction limits, as well as the DD grid
2330 * Also computes the initial ddbox.
2333 getDDGridSetup(const gmx::MDLogger &mdlog,
2335 const DomdecOptions &options,
2336 const DDSettings &ddSettings,
2337 const DDSystemInfo &systemInfo,
2338 const gmx_mtop_t *mtop,
2339 const t_inputrec *ir,
2341 gmx::ArrayRef<const gmx::RVec> xGlobal,
2344 DDGridSetup ddGridSetup;
2346 if (options.numCells[XX] > 0)
2348 copy_ivec(options.numCells, ddGridSetup.numDomains);
2349 set_ddbox_cr(*cr, &ddGridSetup.numDomains, *ir, box, xGlobal, ddbox);
2351 if (options.numPmeRanks >= 0)
2353 ddGridSetup.numPmeOnlyRanks = options.numPmeRanks;
2357 /* When the DD grid is set explicitly and -npme is set to auto,
2358 * don't use PME ranks. We check later if the DD grid is
2359 * compatible with the total number of ranks.
2361 ddGridSetup.numPmeOnlyRanks = 0;
2366 set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2368 /* We need to choose the optimal DD grid and possibly PME nodes */
2370 dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
2371 options.numPmeRanks,
2372 ddSettings.request1DAnd1Pulse,
2373 !isDlbDisabled(ddSettings.initialDlbState),
2377 if (ddGridSetup.numDomains[XX] == 0)
2380 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2381 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2382 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2383 !bC ? "-rdd" : "-rcon",
2384 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2385 bC ? " or your LINCS settings" : "");
2387 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2388 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2390 "Look in the log file for details on the domain decomposition",
2391 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2392 ddGridSetup.cellsizeLimit,
2397 const real acs = average_cellsize_min(*ddbox, ddGridSetup.numDomains);
2398 if (acs < systemInfo.cellsizeLimit)
2400 if (options.numCells[XX] <= 0)
2402 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2406 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2407 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2408 acs, systemInfo.cellsizeLimit);
2412 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2413 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2415 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2416 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2417 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2419 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2421 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2422 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2425 ddGridSetup.numDDDimensions = set_dd_dim(ddGridSetup.numDomains, ddSettings,
2426 ddGridSetup.ddDimensions);
2431 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2433 getDDRankSetup(const gmx::MDLogger &mdlog,
2435 const DDGridSetup &ddGridSetup,
2436 const t_inputrec &ir)
2438 GMX_LOG(mdlog.info).appendTextFormatted(
2439 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2440 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2441 ddGridSetup.numPmeOnlyRanks);
2443 DDRankSetup ddRankSetup;
2445 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2446 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2448 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2449 if (ddRankSetup.usePmeOnlyRanks)
2451 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2455 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2458 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2460 /* The following choices should match those
2461 * in comm_cost_est in domdec_setup.c.
2462 * Note that here the checks have to take into account
2463 * that the decomposition might occur in a different order than xyz
2464 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2465 * in which case they will not match those in comm_cost_est,
2466 * but since that is mainly for testing purposes that's fine.
2468 if (ddGridSetup.numDDDimensions >= 2 &&
2469 ddGridSetup.ddDimensions[0] == XX &&
2470 ddGridSetup.ddDimensions[1] == YY &&
2471 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2472 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2473 getenv("GMX_PMEONEDD") == nullptr)
2475 ddRankSetup.npmedecompdim = 2;
2476 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2477 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2481 /* In case nc is 1 in both x and y we could still choose to
2482 * decompose pme in y instead of x, but we use x for simplicity.
2484 ddRankSetup.npmedecompdim = 1;
2485 if (ddGridSetup.ddDimensions[0] == YY)
2487 ddRankSetup.npmenodes_x = 1;
2488 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2492 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2493 ddRankSetup.npmenodes_y = 1;
2496 GMX_LOG(mdlog.info).appendTextFormatted(
2497 "PME domain decomposition: %d x %d x %d",
2498 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2502 ddRankSetup.npmedecompdim = 0;
2503 ddRankSetup.npmenodes_x = 0;
2504 ddRankSetup.npmenodes_y = 0;
2510 /*! \brief Set the cell size and interaction limits */
2511 static void set_dd_limits(const gmx::MDLogger &mdlog,
2512 t_commrec *cr, gmx_domdec_t *dd,
2513 const DomdecOptions &options,
2514 const DDSettings &ddSettings,
2515 const DDSystemInfo &systemInfo,
2516 const DDGridSetup &ddGridSetup,
2517 const gmx_mtop_t *mtop,
2518 const t_inputrec *ir,
2519 const gmx_ddbox_t &ddbox)
2521 gmx_domdec_comm_t *comm = dd->comm;
2522 comm->ddSettings = ddSettings;
2524 /* Initialize to GPU share count to 0, might change later */
2525 comm->nrank_gpu_shared = 0;
2527 comm->dlbState = comm->ddSettings.initialDlbState;
2528 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2529 /* To consider turning DLB on after 2*nstlist steps we need to check
2530 * at partitioning count 3. Thus we need to increase the first count by 2.
2532 comm->ddPartioningCountFirstDlbOff += 2;
2534 comm->bPMELoadBalDLBLimits = FALSE;
2536 /* Allocate the charge group/atom sorting struct */
2537 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2539 comm->systemInfo = systemInfo;
2541 if (systemInfo.useUpdateGroups)
2543 /* Note: We would like to use dd->nnodes for the atom count estimate,
2544 * but that is not yet available here. But this anyhow only
2545 * affect performance up to the second dd_partition_system call.
2547 const int homeAtomCountEstimate = mtop->natoms/cr->nnodes;
2548 comm->updateGroupsCog =
2549 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2550 systemInfo.updateGroupingPerMoleculetype,
2551 maxReferenceTemperature(*ir),
2552 homeAtomCountEstimate);
2555 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2557 /* Set the DD setup given by ddGridSetup */
2558 copy_ivec(ddGridSetup.numDomains, dd->nc);
2559 dd->ndim = ddGridSetup.numDDDimensions;
2560 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2562 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2564 snew(comm->slb_frac, DIM);
2565 if (isDlbDisabled(comm))
2567 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2568 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2569 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2572 /* Set the multi-body cut-off and cellsize limit for DLB */
2573 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2574 comm->cellsize_limit = systemInfo.cellsizeLimit;
2575 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2577 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2579 /* Set the bonded communication distance to halfway
2580 * the minimum and the maximum,
2581 * since the extra communication cost is nearly zero.
2583 real acs = average_cellsize_min(ddbox, dd->nc);
2584 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2585 if (!isDlbDisabled(comm))
2587 /* Check if this does not limit the scaling */
2588 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2589 options.dlbScaling*acs);
2591 if (!systemInfo.filterBondedCommunication)
2593 /* Without bBondComm do not go beyond the n.b. cut-off */
2594 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2595 if (comm->cellsize_limit >= systemInfo.cutoff)
2597 /* We don't loose a lot of efficieny
2598 * when increasing it to the n.b. cut-off.
2599 * It can even be slightly faster, because we need
2600 * less checks for the communication setup.
2602 comm->cutoff_mbody = systemInfo.cutoff;
2605 /* Check if we did not end up below our original limit */
2606 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2607 systemInfo.minCutoffForMultiBody);
2609 if (comm->cutoff_mbody > comm->cellsize_limit)
2611 comm->cellsize_limit = comm->cutoff_mbody;
2614 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2619 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2620 "cellsize limit %f\n",
2621 gmx::boolToString(systemInfo.filterBondedCommunication),
2622 comm->cellsize_limit);
2627 check_dd_restrictions(dd, ir, mdlog);
2631 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2636 ncg = ncg_mtop(mtop);
2637 snew(bLocalCG, ncg);
2638 for (cg = 0; cg < ncg; cg++)
2640 bLocalCG[cg] = FALSE;
2646 void dd_init_bondeds(FILE *fplog,
2648 const gmx_mtop_t *mtop,
2649 const gmx_vsite_t *vsite,
2650 const t_inputrec *ir,
2651 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2653 gmx_domdec_comm_t *comm;
2655 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2659 if (comm->systemInfo.filterBondedCommunication)
2661 /* Communicate atoms beyond the cut-off for bonded interactions */
2664 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2666 comm->bLocalCG = init_bLocalCG(mtop);
2670 /* Only communicate atoms based on cut-off */
2671 comm->cglink = nullptr;
2672 comm->bLocalCG = nullptr;
2676 static void writeSettings(gmx::TextWriter *log,
2678 const gmx_mtop_t *mtop,
2679 const t_inputrec *ir,
2680 gmx_bool bDynLoadBal,
2682 const gmx_ddbox_t *ddbox)
2684 gmx_domdec_comm_t *comm;
2693 log->writeString("The maximum number of communication pulses is:");
2694 for (d = 0; d < dd->ndim; d++)
2696 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2698 log->ensureLineBreak();
2699 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2700 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2701 log->writeString("The allowed shrink of domain decomposition cells is:");
2702 for (d = 0; d < DIM; d++)
2706 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2713 comm->cellsize_min_dlb[d]/
2714 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2716 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2719 log->ensureLineBreak();
2723 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2724 log->writeString("The initial number of communication pulses is:");
2725 for (d = 0; d < dd->ndim; d++)
2727 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2729 log->ensureLineBreak();
2730 log->writeString("The initial domain decomposition cell size is:");
2731 for (d = 0; d < DIM; d++)
2735 log->writeStringFormatted(" %c %.2f nm",
2736 dim2char(d), dd->comm->cellsize_min[d]);
2739 log->ensureLineBreak();
2743 const bool haveInterDomainVsites =
2744 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2746 if (comm->systemInfo.haveInterDomainBondeds ||
2747 haveInterDomainVsites ||
2748 comm->systemInfo.haveSplitConstraints ||
2749 comm->systemInfo.haveSplitSettles)
2751 std::string decompUnits;
2752 if (comm->systemInfo.useUpdateGroups)
2754 decompUnits = "atom groups";
2758 decompUnits = "atoms";
2761 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2762 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2766 limit = dd->comm->cellsize_limit;
2770 if (dd->unitCellInfo.ddBoxIsDynamic)
2772 log->writeLine("(the following are initial values, they could change due to box deformation)");
2774 limit = dd->comm->cellsize_min[XX];
2775 for (d = 1; d < DIM; d++)
2777 limit = std::min(limit, dd->comm->cellsize_min[d]);
2781 if (comm->systemInfo.haveInterDomainBondeds)
2783 log->writeLineFormatted("%40s %-7s %6.3f nm",
2784 "two-body bonded interactions", "(-rdd)",
2785 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2786 log->writeLineFormatted("%40s %-7s %6.3f nm",
2787 "multi-body bonded interactions", "(-rdd)",
2788 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2790 if (haveInterDomainVsites)
2792 log->writeLineFormatted("%40s %-7s %6.3f nm",
2793 "virtual site constructions", "(-rcon)", limit);
2795 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2797 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2799 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2800 separation.c_str(), "(-rcon)", limit);
2802 log->ensureLineBreak();
2806 static void logSettings(const gmx::MDLogger &mdlog,
2808 const gmx_mtop_t *mtop,
2809 const t_inputrec *ir,
2811 const gmx_ddbox_t *ddbox)
2813 gmx::StringOutputStream stream;
2814 gmx::TextWriter log(&stream);
2815 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2816 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2819 log.ensureEmptyLine();
2820 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2822 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2824 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2827 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2830 const t_inputrec *ir,
2831 const gmx_ddbox_t *ddbox)
2833 gmx_domdec_comm_t *comm;
2834 int d, dim, npulse, npulse_d_max, npulse_d;
2839 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2841 /* Determine the maximum number of comm. pulses in one dimension */
2843 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2845 /* Determine the maximum required number of grid pulses */
2846 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2848 /* Only a single pulse is required */
2851 else if (!bNoCutOff && comm->cellsize_limit > 0)
2853 /* We round down slightly here to avoid overhead due to the latency
2854 * of extra communication calls when the cut-off
2855 * would be only slightly longer than the cell size.
2856 * Later cellsize_limit is redetermined,
2857 * so we can not miss interactions due to this rounding.
2859 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2863 /* There is no cell size limit */
2864 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2867 if (!bNoCutOff && npulse > 1)
2869 /* See if we can do with less pulses, based on dlb_scale */
2871 for (d = 0; d < dd->ndim; d++)
2874 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2875 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2876 npulse_d_max = std::max(npulse_d_max, npulse_d);
2878 npulse = std::min(npulse, npulse_d_max);
2881 /* This env var can override npulse */
2882 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2889 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2890 for (d = 0; d < dd->ndim; d++)
2892 if (comm->ddSettings.request1DAnd1Pulse)
2894 comm->cd[d].np_dlb = 1;
2898 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2899 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2901 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2903 comm->bVacDLBNoLimit = FALSE;
2907 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2908 if (!comm->bVacDLBNoLimit)
2910 comm->cellsize_limit = std::max(comm->cellsize_limit,
2911 comm->systemInfo.cutoff/comm->maxpulse);
2913 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2914 /* Set the minimum cell size for each DD dimension */
2915 for (d = 0; d < dd->ndim; d++)
2917 if (comm->bVacDLBNoLimit ||
2918 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2920 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2924 comm->cellsize_min_dlb[dd->dim[d]] =
2925 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2928 if (comm->cutoff_mbody <= 0)
2930 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2938 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2940 /* If each molecule is a single charge group
2941 * or we use domain decomposition for each periodic dimension,
2942 * we do not need to take pbc into account for the bonded interactions.
2944 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2947 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2950 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2951 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2952 gmx_domdec_t *dd, real dlb_scale,
2953 const gmx_mtop_t *mtop, const t_inputrec *ir,
2954 const gmx_ddbox_t *ddbox)
2956 gmx_domdec_comm_t *comm = dd->comm;
2957 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2959 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2961 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2962 if (ddRankSetup.npmedecompdim >= 2)
2964 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2969 ddRankSetup.numRanksDoingPme = 0;
2970 if (dd->pme_nodeid >= 0)
2972 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2973 "Can not have separate PME ranks without PME electrostatics");
2979 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2981 if (!isDlbDisabled(comm))
2983 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2986 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2989 if (ir->ePBC == epbcNONE)
2991 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2996 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
3000 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
3002 int natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
3004 dd->ga2la = new gmx_ga2la_t(natoms_tot,
3005 static_cast<int>(vol_frac*natoms_tot));
3008 /*! \brief Get some important DD parameters which can be modified by env.vars */
3010 getDDSettings(const gmx::MDLogger &mdlog,
3011 const DomdecOptions &options,
3012 const gmx::MdrunOptions &mdrunOptions,
3013 const t_inputrec &ir)
3015 DDSettings ddSettings;
3017 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
3018 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
3019 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
3020 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
3021 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
3022 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
3023 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
3024 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
3025 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
3026 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
3028 if (ddSettings.useSendRecv2)
3030 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
3033 if (ddSettings.eFlop)
3035 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
3036 ddSettings.recordLoad = true;
3040 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
3043 ddSettings.initialDlbState =
3044 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
3045 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
3046 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
3051 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
3056 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
3058 const DomdecOptions &options,
3059 const gmx::MdrunOptions &mdrunOptions,
3060 const gmx_mtop_t *mtop,
3061 const t_inputrec *ir,
3063 gmx::ArrayRef<const gmx::RVec> xGlobal,
3064 gmx::LocalAtomSetManager *atomSets)
3066 GMX_LOG(mdlog.info).appendTextFormatted(
3067 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3069 DDSettings ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3070 if (ddSettings.eFlop > 1)
3072 /* Ensure that we have different random flop counts on different ranks */
3073 srand(1 + cr->nodeid);
3076 DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3078 gmx_ddbox_t ddbox = {0};
3079 DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, options, ddSettings, systemInfo,
3080 mtop, ir, box, xGlobal, &ddbox);
3082 cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3084 DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3086 /* Generate the group communicator, also decides the duty of each rank */
3087 ivec ddCellIndex = { 0, 0, 0 };
3088 std::vector<int> pmeRanks;
3089 CartesianRankSetup cartSetup =
3090 makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3092 ddCellIndex, &pmeRanks);
3094 gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3096 copy_ivec(ddCellIndex, dd->ci);
3098 dd->comm = init_dd_comm();
3100 dd->comm->ddRankSetup = ddRankSetup;
3101 dd->comm->cartesianRankSetup = cartSetup;
3103 set_dd_limits(mdlog, cr, dd, options,
3104 ddSettings, systemInfo, ddGridSetup,
3108 setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, dd);
3110 if (thisRankHasDuty(cr, DUTY_PP))
3112 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3114 setup_neighbor_relations(dd);
3117 /* Set overallocation to avoid frequent reallocation of arrays */
3118 set_over_alloc_dd(TRUE);
3120 dd->atomSets = atomSets;
3125 static gmx_bool test_dd_cutoff(t_commrec *cr,
3127 gmx::ArrayRef<const gmx::RVec> x,
3128 real cutoffRequested)
3138 set_ddbox(*dd, false, box, true, x, &ddbox);
3142 for (d = 0; d < dd->ndim; d++)
3146 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3147 if (dd->unitCellInfo.ddBoxIsDynamic)
3149 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3152 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3154 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3156 if (np > dd->comm->cd[d].np_dlb)
3161 /* If a current local cell size is smaller than the requested
3162 * cut-off, we could still fix it, but this gets very complicated.
3163 * Without fixing here, we might actually need more checks.
3165 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3166 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3173 if (!isDlbDisabled(dd->comm))
3175 /* If DLB is not active yet, we don't need to check the grid jumps.
3176 * Actually we shouldn't, because then the grid jump data is not set.
3178 if (isDlbOn(dd->comm) &&
3179 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3184 gmx_sumi(1, &LocallyLimited, cr);
3186 if (LocallyLimited > 0)
3195 gmx_bool change_dd_cutoff(t_commrec *cr,
3197 gmx::ArrayRef<const gmx::RVec> x,
3198 real cutoffRequested)
3200 gmx_bool bCutoffAllowed;
3202 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3206 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3209 return bCutoffAllowed;