2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/builder.h"
54 #include "gromacs/domdec/collect.h"
55 #include "gromacs/domdec/dlb.h"
56 #include "gromacs/domdec/dlbtiming.h"
57 #include "gromacs/domdec/domdec_network.h"
58 #include "gromacs/domdec/ga2la.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
60 #include "gromacs/domdec/options.h"
61 #include "gromacs/domdec/partition.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/listed_forces/manage_threading.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vectypes.h"
69 #include "gromacs/mdlib/calc_verletbuf.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/updategroups.h"
73 #include "gromacs/mdlib/vsite.h"
74 #include "gromacs/mdtypes/commrec.h"
75 #include "gromacs/mdtypes/forceoutput.h"
76 #include "gromacs/mdtypes/inputrec.h"
77 #include "gromacs/mdtypes/mdrunoptions.h"
78 #include "gromacs/mdtypes/state.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/pulling/pull.h"
82 #include "gromacs/timing/wallcycle.h"
83 #include "gromacs/topology/block.h"
84 #include "gromacs/topology/idef.h"
85 #include "gromacs/topology/ifunc.h"
86 #include "gromacs/topology/mtop_lookup.h"
87 #include "gromacs/topology/mtop_util.h"
88 #include "gromacs/topology/topology.h"
89 #include "gromacs/utility/basedefinitions.h"
90 #include "gromacs/utility/basenetwork.h"
91 #include "gromacs/utility/cstringutil.h"
92 #include "gromacs/utility/exceptions.h"
93 #include "gromacs/utility/fatalerror.h"
94 #include "gromacs/utility/gmxmpi.h"
95 #include "gromacs/utility/logger.h"
96 #include "gromacs/utility/real.h"
97 #include "gromacs/utility/smalloc.h"
98 #include "gromacs/utility/strconvert.h"
99 #include "gromacs/utility/stringstream.h"
100 #include "gromacs/utility/stringutil.h"
101 #include "gromacs/utility/textwriter.h"
103 #include "atomdistribution.h"
105 #include "cellsizes.h"
106 #include "distribute.h"
107 #include "domdec_constraints.h"
108 #include "domdec_internal.h"
109 #include "domdec_setup.h"
110 #include "domdec_vsite.h"
111 #include "redistribute.h"
114 // TODO remove this when moving domdec into gmx namespace
115 using gmx::DdRankOrder;
116 using gmx::DlbOption;
117 using gmx::DomdecOptions;
119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_FLAG_NRCG 65535
126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
129 /* The DD zone order */
130 static const ivec dd_zo[DD_MAXZONE] =
131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
133 /* The non-bonded zone-pair setup for domain decomposition
134 * The first number is the i-zone, the second number the first j-zone seen by
135 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
136 * As is, this is for 3D decomposition, where there are 4 i-zones.
137 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
138 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
141 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
148 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
150 static void index2xyz(ivec nc,int ind,ivec xyz)
152 xyz[XX] = ind % nc[XX];
153 xyz[YY] = (ind / nc[XX]) % nc[YY];
154 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
160 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
161 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
162 xyz[ZZ] = ind % nc[ZZ];
165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
169 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
170 const int ddindex = dd_index(dd->nc, c);
171 if (cartSetup.bCartesianPP_PME)
173 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
175 else if (cartSetup.bCartesianPP)
178 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
189 int ddglatnr(const gmx_domdec_t *dd, int i)
199 if (i >= dd->comm->atomRanges.numAtomsTotal())
201 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
203 atnr = dd->globalAtomIndices[i] + 1;
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
211 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
219 if (state->ddp_count != dd->ddp_count)
221 gmx_incons("The MD state does not match the domain decomposition state");
224 state->cg_gl.resize(dd->ncg_home);
225 for (i = 0; i < dd->ncg_home; i++)
227 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
230 state->ddp_count_cg_gl = dd->ddp_count;
233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
235 return &dd->comm->zones;
238 int dd_numHomeAtoms(const gmx_domdec_t &dd)
240 return dd.comm->atomRanges.numHomeAtoms();
243 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
245 /* We currently set mdatoms entries for all atoms:
246 * local + non-local + communicated for vsite + constraints
249 return dd->comm->atomRanges.numAtomsTotal();
252 int dd_natoms_vsite(const gmx_domdec_t *dd)
254 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
257 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
259 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
260 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
263 void dd_move_x(gmx_domdec_t *dd,
265 gmx::ArrayRef<gmx::RVec> x,
266 gmx_wallcycle *wcycle)
268 wallcycle_start(wcycle, ewcMOVEX);
271 gmx_domdec_comm_t *comm;
272 gmx_domdec_comm_dim_t *cd;
273 rvec shift = {0, 0, 0};
274 gmx_bool bPBC, bScrew;
279 nat_tot = comm->atomRanges.numHomeAtoms();
280 for (int d = 0; d < dd->ndim; d++)
282 bPBC = (dd->ci[dd->dim[d]] == 0);
283 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
286 copy_rvec(box[dd->dim[d]], shift);
289 for (const gmx_domdec_ind_t &ind : cd->ind)
291 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
292 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
296 for (int j : ind.index)
298 sendBuffer[n] = x[j];
304 for (int j : ind.index)
306 /* We need to shift the coordinates */
307 for (int d = 0; d < DIM; d++)
309 sendBuffer[n][d] = x[j][d] + shift[d];
316 for (int j : ind.index)
319 sendBuffer[n][XX] = x[j][XX] + shift[XX];
321 * This operation requires a special shift force
322 * treatment, which is performed in calc_vir.
324 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
325 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
330 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
332 gmx::ArrayRef<gmx::RVec> receiveBuffer;
333 if (cd->receiveInPlace)
335 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
339 receiveBuffer = receiveBufferAccess.buffer;
341 /* Send and receive the coordinates */
342 ddSendrecv(dd, d, dddirBackward,
343 sendBuffer, receiveBuffer);
345 if (!cd->receiveInPlace)
348 for (int zone = 0; zone < nzone; zone++)
350 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
352 x[i] = receiveBuffer[j++];
356 nat_tot += ind.nrecv[nzone+1];
361 wallcycle_stop(wcycle, ewcMOVEX);
364 void dd_move_f(gmx_domdec_t *dd,
365 gmx::ForceWithShiftForces *forceWithShiftForces,
366 gmx_wallcycle *wcycle)
368 wallcycle_start(wcycle, ewcMOVEF);
370 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
371 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
373 gmx_domdec_comm_t &comm = *dd->comm;
374 int nzone = comm.zones.n/2;
375 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
376 for (int d = dd->ndim-1; d >= 0; d--)
378 /* Only forces in domains near the PBC boundaries need to
379 consider PBC in the treatment of fshift */
380 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
381 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
382 /* Determine which shift vector we need */
383 ivec vis = { 0, 0, 0 };
385 const int is = IVEC2IS(vis);
387 /* Loop over the pulses */
388 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
389 for (int p = cd.numPulses() - 1; p >= 0; p--)
391 const gmx_domdec_ind_t &ind = cd.ind[p];
392 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
393 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
395 nat_tot -= ind.nrecv[nzone+1];
397 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
399 gmx::ArrayRef<gmx::RVec> sendBuffer;
400 if (cd.receiveInPlace)
402 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
406 sendBuffer = sendBufferAccess.buffer;
408 for (int zone = 0; zone < nzone; zone++)
410 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
412 sendBuffer[j++] = f[i];
416 /* Communicate the forces */
417 ddSendrecv(dd, d, dddirForward,
418 sendBuffer, receiveBuffer);
419 /* Add the received forces */
421 if (!shiftForcesNeedPbc)
423 for (int j : ind.index)
425 for (int d = 0; d < DIM; d++)
427 f[j][d] += receiveBuffer[n][d];
432 else if (!applyScrewPbc)
434 for (int j : ind.index)
436 for (int d = 0; d < DIM; d++)
438 f[j][d] += receiveBuffer[n][d];
440 /* Add this force to the shift force */
441 for (int d = 0; d < DIM; d++)
443 fshift[is][d] += receiveBuffer[n][d];
450 for (int j : ind.index)
452 /* Rotate the force */
453 f[j][XX] += receiveBuffer[n][XX];
454 f[j][YY] -= receiveBuffer[n][YY];
455 f[j][ZZ] -= receiveBuffer[n][ZZ];
456 if (shiftForcesNeedPbc)
458 /* Add this force to the shift force */
459 for (int d = 0; d < DIM; d++)
461 fshift[is][d] += receiveBuffer[n][d];
470 wallcycle_stop(wcycle, ewcMOVEF);
473 /* Convenience function for extracting a real buffer from an rvec buffer
475 * To reduce the number of temporary communication buffers and avoid
476 * cache polution, we reuse gmx::RVec buffers for storing reals.
477 * This functions return a real buffer reference with the same number
478 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
480 static gmx::ArrayRef<real>
481 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
483 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
487 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
490 gmx_domdec_comm_t *comm;
491 gmx_domdec_comm_dim_t *cd;
496 nat_tot = comm->atomRanges.numHomeAtoms();
497 for (int d = 0; d < dd->ndim; d++)
500 for (const gmx_domdec_ind_t &ind : cd->ind)
502 /* Note: We provision for RVec instead of real, so a factor of 3
503 * more than needed. The buffer actually already has this size
504 * and we pass a plain pointer below, so this does not matter.
506 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
507 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
509 for (int j : ind.index)
511 sendBuffer[n++] = v[j];
514 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
516 gmx::ArrayRef<real> receiveBuffer;
517 if (cd->receiveInPlace)
519 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
523 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
525 /* Send and receive the data */
526 ddSendrecv(dd, d, dddirBackward,
527 sendBuffer, receiveBuffer);
528 if (!cd->receiveInPlace)
531 for (int zone = 0; zone < nzone; zone++)
533 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
535 v[i] = receiveBuffer[j++];
539 nat_tot += ind.nrecv[nzone+1];
545 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
548 gmx_domdec_comm_t *comm;
549 gmx_domdec_comm_dim_t *cd;
553 nzone = comm->zones.n/2;
554 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
555 for (int d = dd->ndim-1; d >= 0; d--)
558 for (int p = cd->numPulses() - 1; p >= 0; p--)
560 const gmx_domdec_ind_t &ind = cd->ind[p];
562 /* Note: We provision for RVec instead of real, so a factor of 3
563 * more than needed. The buffer actually already has this size
564 * and we typecast, so this works as intended.
566 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
567 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
568 nat_tot -= ind.nrecv[nzone + 1];
570 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
572 gmx::ArrayRef<real> sendBuffer;
573 if (cd->receiveInPlace)
575 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
579 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
581 for (int zone = 0; zone < nzone; zone++)
583 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
585 sendBuffer[j++] = v[i];
589 /* Communicate the forces */
590 ddSendrecv(dd, d, dddirForward,
591 sendBuffer, receiveBuffer);
592 /* Add the received forces */
594 for (int j : ind.index)
596 v[j] += receiveBuffer[n];
604 real dd_cutoff_multibody(const gmx_domdec_t *dd)
606 const gmx_domdec_comm_t &comm = *dd->comm;
607 const DDSystemInfo &systemInfo = comm.systemInfo;
610 if (systemInfo.haveInterDomainMultiBodyBondeds)
612 if (comm.cutoff_mbody > 0)
614 r = comm.cutoff_mbody;
618 /* cutoff_mbody=0 means we do not have DLB */
619 r = comm.cellsize_min[dd->dim[0]];
620 for (int di = 1; di < dd->ndim; di++)
622 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
624 if (comm.systemInfo.filterBondedCommunication)
626 r = std::max(r, comm.cutoff_mbody);
630 r = std::min(r, systemInfo.cutoff);
638 real dd_cutoff_twobody(const gmx_domdec_t *dd)
642 r_mb = dd_cutoff_multibody(dd);
644 return std::max(dd->comm->systemInfo.cutoff, r_mb);
648 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
649 const CartesianRankSetup &cartSetup,
653 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
654 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
655 copy_ivec(coord, coord_pme);
656 coord_pme[cartSetup.cartpmedim] =
657 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
660 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
661 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
662 const int ddCellIndex)
664 const int npp = ddRankSetup.numPPRanks;
665 const int npme = ddRankSetup.numRanksDoingPme;
667 /* Here we assign a PME node to communicate with this DD node
668 * by assuming that the major index of both is x.
669 * We add npme/2 to obtain an even distribution.
671 return (ddCellIndex*npme + npme/2)/npp;
674 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
676 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
679 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
681 const int p0 = ddindex2pmeindex(ddRankSetup, i);
682 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
683 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
687 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
689 pmeRanks[n] = i + 1 + n;
697 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
705 if (dd->comm->bCartesian) {
706 gmx_ddindex2xyz(dd->nc,ddindex,coords);
707 dd_coords2pmecoords(dd,coords,coords_pme);
708 copy_ivec(dd->ntot,nc);
709 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
710 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
712 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
714 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
720 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
725 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
727 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
728 ivec coords = { x, y, z };
731 if (cartSetup.bCartesianPP_PME)
734 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
739 int ddindex = dd_index(cr->dd->nc, coords);
740 if (cartSetup.bCartesianPP)
742 nodeid = cartSetup.ddindex2simnodeid[ddindex];
746 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
748 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
760 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
761 const CartesianRankSetup &cartSetup,
762 gmx::ArrayRef<const int> pmeRanks,
763 const t_commrec gmx_unused *cr,
764 const int sim_nodeid)
768 /* This assumes a uniform x domain decomposition grid cell size */
769 if (cartSetup.bCartesianPP_PME)
772 ivec coord, coord_pme;
773 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
774 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
776 /* This is a PP rank */
777 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
778 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
782 else if (cartSetup.bCartesianPP)
784 if (sim_nodeid < ddRankSetup.numPPRanks)
786 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
791 /* This assumes DD cells with identical x coordinates
792 * are numbered sequentially.
794 if (pmeRanks.empty())
796 if (sim_nodeid < ddRankSetup.numPPRanks)
798 /* The DD index equals the nodeid */
799 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
805 while (sim_nodeid > pmeRanks[i])
809 if (sim_nodeid < pmeRanks[i])
811 pmenode = pmeRanks[i];
819 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
824 dd->comm->ddRankSetup.npmenodes_x,
825 dd->comm->ddRankSetup.npmenodes_y
836 std::vector<int> get_pme_ddranks(const t_commrec *cr,
839 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
840 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
841 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
842 std::vector<int> ddranks;
843 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
845 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
847 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
849 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
851 if (cartSetup.bCartesianPP_PME)
853 ivec coord = { x, y, z };
855 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
856 if (cr->dd->ci[XX] == coord_pme[XX] &&
857 cr->dd->ci[YY] == coord_pme[YY] &&
858 cr->dd->ci[ZZ] == coord_pme[ZZ])
860 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
865 /* The slab corresponds to the nodeid in the PME group */
866 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
868 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
877 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
878 gmx::ArrayRef<const int> pmeRanks,
881 gmx_bool bReceive = TRUE;
883 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
884 if (ddRankSetup.usePmeOnlyRanks)
886 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
887 if (cartSetup.bCartesianPP_PME)
890 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
892 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
893 coords[cartSetup.cartpmedim]++;
894 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
897 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
898 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
900 /* This is not the last PP node for pmenode */
905 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
910 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
911 if (cr->sim_nodeid+1 < cr->nnodes &&
912 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
914 /* This is not the last PP node for pmenode */
923 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
925 gmx_domdec_comm_t *comm;
930 snew(*dim_f, dd->nc[dim]+1);
932 for (i = 1; i < dd->nc[dim]; i++)
934 if (comm->slb_frac[dim])
936 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
940 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
943 (*dim_f)[dd->nc[dim]] = 1;
946 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
948 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
950 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
958 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
960 ddpme->nslab = (ddpme->dim == 0 ?
961 ddRankSetup.npmenodes_x :
962 ddRankSetup.npmenodes_y);
964 if (ddpme->nslab <= 1)
969 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
970 /* Determine for each PME slab the PP location range for dimension dim */
971 snew(ddpme->pp_min, ddpme->nslab);
972 snew(ddpme->pp_max, ddpme->nslab);
973 for (int slab = 0; slab < ddpme->nslab; slab++)
975 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
976 ddpme->pp_max[slab] = 0;
978 for (int i = 0; i < dd->nnodes; i++)
981 ddindex2xyz(dd->nc, i, xyz);
982 /* For y only use our y/z slab.
983 * This assumes that the PME x grid size matches the DD grid size.
985 if (dimind == 0 || xyz[XX] == dd->ci[XX])
987 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
995 slab = pmeindex % ddpme->nslab;
997 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
998 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1002 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1005 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1007 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1009 if (ddRankSetup.ddpme[0].dim == XX)
1011 return ddRankSetup.ddpme[0].maxshift;
1019 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1021 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1023 if (ddRankSetup.ddpme[0].dim == YY)
1025 return ddRankSetup.ddpme[0].maxshift;
1027 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1029 return ddRankSetup.ddpme[1].maxshift;
1037 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1039 return dd.comm->systemInfo.haveSplitConstraints;
1042 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1044 /* Note that the cycles value can be incorrect, either 0 or some
1045 * extremely large value, when our thread migrated to another core
1046 * with an unsynchronized cycle counter. If this happens less often
1047 * that once per nstlist steps, this will not cause issues, since
1048 * we later subtract the maximum value from the sum over nstlist steps.
1049 * A zero count will slightly lower the total, but that's a small effect.
1050 * Note that the main purpose of the subtraction of the maximum value
1051 * is to avoid throwing off the load balancing when stalls occur due
1052 * e.g. system activity or network congestion.
1054 dd->comm->cycl[ddCycl] += cycles;
1055 dd->comm->cycl_n[ddCycl]++;
1056 if (cycles > dd->comm->cycl_max[ddCycl])
1058 dd->comm->cycl_max[ddCycl] = cycles;
1063 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1068 gmx_bool bPartOfGroup = FALSE;
1070 dim = dd->dim[dim_ind];
1071 copy_ivec(loc, loc_c);
1072 for (i = 0; i < dd->nc[dim]; i++)
1075 rank = dd_index(dd->nc, loc_c);
1076 if (rank == dd->rank)
1078 /* This process is part of the group */
1079 bPartOfGroup = TRUE;
1082 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1086 dd->comm->mpi_comm_load[dim_ind] = c_row;
1087 if (!isDlbDisabled(dd->comm))
1089 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1091 if (dd->ci[dim] == dd->master_ci[dim])
1093 /* This is the root process of this row */
1094 cellsizes.rowMaster = std::make_unique<RowMaster>();
1096 RowMaster &rowMaster = *cellsizes.rowMaster;
1097 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1098 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1099 rowMaster.isCellMin.resize(dd->nc[dim]);
1102 rowMaster.bounds.resize(dd->nc[dim]);
1104 rowMaster.buf_ncd.resize(dd->nc[dim]);
1108 /* This is not a root process, we only need to receive cell_f */
1109 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1112 if (dd->ci[dim] == dd->master_ci[dim])
1114 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1120 void dd_setup_dlb_resource_sharing(const t_commrec *cr,
1124 int physicalnode_id_hash;
1126 MPI_Comm mpi_comm_pp_physicalnode;
1128 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1130 /* Only ranks with short-ranged tasks (currently) use GPUs.
1131 * If we don't have GPUs assigned, there are no resources to share.
1136 physicalnode_id_hash = gmx_physicalnode_id_hash();
1142 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1143 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1144 dd->rank, physicalnode_id_hash, gpu_id);
1146 /* Split the PP communicator over the physical nodes */
1147 /* TODO: See if we should store this (before), as it's also used for
1148 * for the nodecomm summation.
1150 // TODO PhysicalNodeCommunicator could be extended/used to handle
1151 // the need for per-node per-group communicators.
1152 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1153 &mpi_comm_pp_physicalnode);
1154 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1155 &dd->comm->mpi_comm_gpu_shared);
1156 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1157 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1161 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1164 /* Note that some ranks could share a GPU, while others don't */
1166 if (dd->comm->nrank_gpu_shared == 1)
1168 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1171 GMX_UNUSED_VALUE(cr);
1172 GMX_UNUSED_VALUE(gpu_id);
1176 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1179 int dim0, dim1, i, j;
1184 fprintf(debug, "Making load communicators\n");
1187 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1188 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1196 make_load_communicator(dd, 0, loc);
1200 for (i = 0; i < dd->nc[dim0]; i++)
1203 make_load_communicator(dd, 1, loc);
1209 for (i = 0; i < dd->nc[dim0]; i++)
1213 for (j = 0; j < dd->nc[dim1]; j++)
1216 make_load_communicator(dd, 2, loc);
1223 fprintf(debug, "Finished making load communicators\n");
1228 /*! \brief Sets up the relation between neighboring domains and zones */
1229 static void setup_neighbor_relations(gmx_domdec_t *dd)
1231 int d, dim, i, j, m;
1233 gmx_domdec_zones_t *zones;
1234 gmx_domdec_ns_ranges_t *izone;
1235 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1237 for (d = 0; d < dd->ndim; d++)
1240 copy_ivec(dd->ci, tmp);
1241 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1242 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1243 copy_ivec(dd->ci, tmp);
1244 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1245 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1248 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1251 dd->neighbor[d][1]);
1255 int nzone = (1 << dd->ndim);
1256 int nizone = (1 << std::max(dd->ndim - 1, 0));
1257 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1259 zones = &dd->comm->zones;
1261 for (i = 0; i < nzone; i++)
1264 clear_ivec(zones->shift[i]);
1265 for (d = 0; d < dd->ndim; d++)
1267 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1272 for (i = 0; i < nzone; i++)
1274 for (d = 0; d < DIM; d++)
1276 s[d] = dd->ci[d] - zones->shift[i][d];
1281 else if (s[d] >= dd->nc[d])
1287 zones->nizone = nizone;
1288 for (i = 0; i < zones->nizone; i++)
1290 assert(ddNonbondedZonePairRanges[i][0] == i);
1292 izone = &zones->izone[i];
1293 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1294 * j-zones up to nzone.
1296 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1297 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1298 for (dim = 0; dim < DIM; dim++)
1300 if (dd->nc[dim] == 1)
1302 /* All shifts should be allowed */
1303 izone->shift0[dim] = -1;
1304 izone->shift1[dim] = 1;
1308 /* Determine the min/max j-zone shift wrt the i-zone */
1309 izone->shift0[dim] = 1;
1310 izone->shift1[dim] = -1;
1311 for (j = izone->j0; j < izone->j1; j++)
1313 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1314 if (shift_diff < izone->shift0[dim])
1316 izone->shift0[dim] = shift_diff;
1318 if (shift_diff > izone->shift1[dim])
1320 izone->shift1[dim] = shift_diff;
1327 if (!isDlbDisabled(dd->comm))
1329 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1332 if (dd->comm->ddSettings.recordLoad)
1334 make_load_communicators(dd);
1338 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1340 t_commrec gmx_unused *cr,
1341 bool gmx_unused reorder)
1344 gmx_domdec_comm_t *comm = dd->comm;
1345 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1347 if (cartSetup.bCartesianPP)
1349 /* Set up cartesian communication for the particle-particle part */
1350 GMX_LOG(mdlog.info).appendTextFormatted(
1351 "Will use a Cartesian communicator: %d x %d x %d",
1352 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1355 for (int i = 0; i < DIM; i++)
1360 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1362 /* We overwrite the old communicator with the new cartesian one */
1363 cr->mpi_comm_mygroup = comm_cart;
1366 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1367 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1369 if (cartSetup.bCartesianPP_PME)
1371 /* Since we want to use the original cartesian setup for sim,
1372 * and not the one after split, we need to make an index.
1374 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1375 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1376 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1377 /* Get the rank of the DD master,
1378 * above we made sure that the master node is a PP node.
1389 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1391 else if (cartSetup.bCartesianPP)
1393 if (!comm->ddRankSetup.usePmeOnlyRanks)
1395 /* The PP communicator is also
1396 * the communicator for this simulation
1398 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1400 cr->nodeid = dd->rank;
1402 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1404 /* We need to make an index to go from the coordinates
1405 * to the nodeid of this simulation.
1407 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1408 std::vector<int> buf(dd->nnodes);
1409 if (thisRankHasDuty(cr, DUTY_PP))
1411 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1413 /* Communicate the ddindex to simulation nodeid index */
1414 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1415 cr->mpi_comm_mysim);
1417 /* Determine the master coordinates and rank.
1418 * The DD master should be the same node as the master of this sim.
1420 for (int i = 0; i < dd->nnodes; i++)
1422 if (cartSetup.ddindex2simnodeid[i] == 0)
1424 ddindex2xyz(dd->nc, i, dd->master_ci);
1425 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1430 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1435 /* No Cartesian communicators */
1436 /* We use the rank in dd->comm->all as DD index */
1437 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1438 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1440 clear_ivec(dd->master_ci);
1444 GMX_LOG(mdlog.info).appendTextFormatted(
1445 "Domain decomposition rank %d, coordinates %d %d %d\n",
1446 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1450 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1451 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1455 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1459 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1461 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1463 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1464 std::vector<int> buf(dd->nnodes);
1465 if (thisRankHasDuty(cr, DUTY_PP))
1467 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1469 /* Communicate the ddindex to simulation nodeid index */
1470 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1471 cr->mpi_comm_mysim);
1474 GMX_UNUSED_VALUE(dd);
1475 GMX_UNUSED_VALUE(cr);
1479 static CartesianRankSetup
1480 split_communicator(const gmx::MDLogger &mdlog,
1482 const DdRankOrder ddRankOrder,
1483 bool gmx_unused reorder,
1484 const DDRankSetup &ddRankSetup,
1486 std::vector<int> *pmeRanks)
1488 CartesianRankSetup cartSetup;
1490 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1491 cartSetup.bCartesianPP_PME = false;
1493 const ivec &numDDCells = ddRankSetup.numPPCells;
1494 /* Initially we set ntot to the number of PP cells */
1495 copy_ivec(numDDCells, cartSetup.ntot);
1497 if (cartSetup.bCartesianPP)
1499 const int numDDCellsTot = ddRankSetup.numPPRanks;
1501 for (int i = 1; i < DIM; i++)
1503 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1505 if (bDiv[YY] || bDiv[ZZ])
1507 cartSetup.bCartesianPP_PME = TRUE;
1508 /* If we have 2D PME decomposition, which is always in x+y,
1509 * we stack the PME only nodes in z.
1510 * Otherwise we choose the direction that provides the thinnest slab
1511 * of PME only nodes as this will have the least effect
1512 * on the PP communication.
1513 * But for the PME communication the opposite might be better.
1515 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1517 numDDCells[YY] > numDDCells[ZZ]))
1519 cartSetup.cartpmedim = ZZ;
1523 cartSetup.cartpmedim = YY;
1525 cartSetup.ntot[cartSetup.cartpmedim]
1526 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1530 GMX_LOG(mdlog.info).appendTextFormatted(
1531 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1532 ddRankSetup.numRanksDoingPme,
1533 numDDCells[XX], numDDCells[YY],
1534 numDDCells[XX], numDDCells[ZZ]);
1535 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1539 if (cartSetup.bCartesianPP_PME)
1545 GMX_LOG(mdlog.info).appendTextFormatted(
1546 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1547 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1549 for (int i = 0; i < DIM; i++)
1554 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1556 MPI_Comm_rank(comm_cart, &rank);
1557 if (MASTER(cr) && rank != 0)
1559 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1562 /* With this assigment we loose the link to the original communicator
1563 * which will usually be MPI_COMM_WORLD, unless have multisim.
1565 cr->mpi_comm_mysim = comm_cart;
1566 cr->sim_nodeid = rank;
1568 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1570 GMX_LOG(mdlog.info).appendTextFormatted(
1571 "Cartesian rank %d, coordinates %d %d %d\n",
1572 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1574 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1578 if (!ddRankSetup.usePmeOnlyRanks ||
1579 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1581 cr->duty = DUTY_PME;
1584 /* Split the sim communicator into PP and PME only nodes */
1585 MPI_Comm_split(cr->mpi_comm_mysim,
1586 getThisRankDuties(cr),
1587 dd_index(cartSetup.ntot, ddCellIndex),
1588 &cr->mpi_comm_mygroup);
1590 GMX_UNUSED_VALUE(ddCellIndex);
1595 switch (ddRankOrder)
1597 case DdRankOrder::pp_pme:
1598 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1600 case DdRankOrder::interleave:
1601 /* Interleave the PP-only and PME-only ranks */
1602 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1603 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1605 case DdRankOrder::cartesian:
1608 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1611 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1613 cr->duty = DUTY_PME;
1620 /* Split the sim communicator into PP and PME only nodes */
1621 MPI_Comm_split(cr->mpi_comm_mysim,
1622 getThisRankDuties(cr),
1624 &cr->mpi_comm_mygroup);
1625 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1629 GMX_LOG(mdlog.info).appendTextFormatted(
1630 "This rank does only %s work.\n",
1631 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1636 /*! \brief Makes the PP communicator and the PME communicator, when needed
1638 * Returns the Cartesian rank setup.
1639 * Sets \p cr->mpi_comm_mygroup
1640 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1641 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1643 static CartesianRankSetup
1644 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1645 const DDSettings &ddSettings,
1646 const DdRankOrder ddRankOrder,
1647 const DDRankSetup &ddRankSetup,
1650 std::vector<int> *pmeRanks)
1652 CartesianRankSetup cartSetup;
1654 if (ddRankSetup.usePmeOnlyRanks)
1656 /* Split the communicator into a PP and PME part */
1658 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1659 ddRankSetup, ddCellIndex, pmeRanks);
1663 /* All nodes do PP and PME */
1664 /* We do not require separate communicators */
1665 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1667 cartSetup.bCartesianPP = false;
1668 cartSetup.bCartesianPP_PME = false;
1674 /*! \brief For PP ranks, sets or makes the communicator
1676 * For PME ranks get the rank id.
1677 * For PP only ranks, sets the PME-only rank.
1679 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1680 const DDSettings &ddSettings,
1681 gmx::ArrayRef<const int> pmeRanks,
1683 const int numAtomsInSystem,
1686 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1687 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1689 if (thisRankHasDuty(cr, DUTY_PP))
1691 /* Copy or make a new PP communicator */
1693 /* We (possibly) reordered the nodes in split_communicator,
1694 * so it is no longer required in make_pp_communicator.
1696 const bool useCartesianReorder =
1697 (ddSettings.useCartesianReorder &&
1698 !cartSetup.bCartesianPP_PME);
1700 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1704 receive_ddindex2simnodeid(dd, cr);
1707 if (!thisRankHasDuty(cr, DUTY_PME))
1709 /* Set up the commnuication to our PME node */
1710 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1711 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1714 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1715 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1720 dd->pme_nodeid = -1;
1723 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1726 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1732 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1733 const char *dir, int nc, const char *size_string)
1735 real *slb_frac, tot;
1740 if (nc > 1 && size_string != nullptr)
1742 GMX_LOG(mdlog.info).appendTextFormatted(
1743 "Using static load balancing for the %s direction", dir);
1746 for (i = 0; i < nc; i++)
1749 sscanf(size_string, "%20lf%n", &dbl, &n);
1752 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1759 std::string relativeCellSizes = "Relative cell sizes:";
1760 for (i = 0; i < nc; i++)
1763 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1765 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1771 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1774 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1776 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1778 for (auto &ilist : extractILists(*ilists, IF_BOND))
1780 if (NRAL(ilist.functionType) > 2)
1782 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1790 static int dd_getenv(const gmx::MDLogger &mdlog,
1791 const char *env_var, int def)
1797 val = getenv(env_var);
1800 if (sscanf(val, "%20d", &nst) <= 0)
1804 GMX_LOG(mdlog.info).appendTextFormatted(
1805 "Found env.var. %s = %s, using value %d",
1812 static void check_dd_restrictions(const gmx_domdec_t *dd,
1813 const t_inputrec *ir,
1814 const gmx::MDLogger &mdlog)
1816 if (ir->ePBC == epbcSCREW &&
1817 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1819 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1822 if (ir->nstlist == 0)
1824 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1827 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1829 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1833 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1834 const ivec numDomains)
1836 real r = ddbox.box_size[XX];
1837 for (int d = 0; d < DIM; d++)
1839 if (numDomains[d] > 1)
1841 /* Check using the initial average cell size */
1842 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1849 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1851 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1852 const std::string &reasonStr,
1853 const gmx::MDLogger &mdlog)
1855 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1856 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1858 if (cmdlineDlbState == DlbState::onUser)
1860 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1862 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1864 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1866 return DlbState::offForever;
1869 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1871 * This function parses the parameters of "-dlb" command line option setting
1872 * corresponding state values. Then it checks the consistency of the determined
1873 * state with other run parameters and settings. As a result, the initial state
1874 * may be altered or an error may be thrown if incompatibility of options is detected.
1876 * \param [in] mdlog Logger.
1877 * \param [in] dlbOption Enum value for the DLB option.
1878 * \param [in] bRecordLoad True if the load balancer is recording load information.
1879 * \param [in] mdrunOptions Options for mdrun.
1880 * \param [in] ir Pointer mdrun to input parameters.
1881 * \returns DLB initial/startup state.
1883 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1884 DlbOption dlbOption, gmx_bool bRecordLoad,
1885 const gmx::MdrunOptions &mdrunOptions,
1886 const t_inputrec *ir)
1888 DlbState dlbState = DlbState::offCanTurnOn;
1892 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1893 case DlbOption::no: dlbState = DlbState::offUser; break;
1894 case DlbOption::yes: dlbState = DlbState::onUser; break;
1895 default: gmx_incons("Invalid dlbOption enum value");
1898 /* Reruns don't support DLB: bail or override auto mode */
1899 if (mdrunOptions.rerun)
1901 std::string reasonStr = "it is not supported in reruns.";
1902 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1905 /* Unsupported integrators */
1906 if (!EI_DYNAMICS(ir->eI))
1908 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1909 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1912 /* Without cycle counters we can't time work to balance on */
1915 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1916 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1919 if (mdrunOptions.reproducible)
1921 std::string reasonStr = "you started a reproducible run.";
1924 case DlbState::offUser:
1926 case DlbState::offForever:
1927 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1929 case DlbState::offCanTurnOn:
1930 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1931 case DlbState::onCanTurnOff:
1932 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1934 case DlbState::onUser:
1935 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1937 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1944 static gmx_domdec_comm_t *init_dd_comm()
1946 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1948 comm->n_load_have = 0;
1949 comm->n_load_collect = 0;
1951 comm->haveTurnedOffDlb = false;
1953 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1955 comm->sum_nat[i] = 0;
1959 comm->load_step = 0;
1962 clear_ivec(comm->load_lim);
1966 /* This should be replaced by a unique pointer */
1967 comm->balanceRegion = ddBalanceRegionAllocate();
1972 /* Returns whether mtop contains constraints and/or vsites */
1973 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1975 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1977 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1979 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1988 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1989 const gmx_mtop_t &mtop,
1990 const t_inputrec &inputrec,
1991 const real cutoffMargin,
1992 DDSystemInfo *systemInfo)
1994 /* When we have constraints and/or vsites, it is beneficial to use
1995 * update groups (when possible) to allow independent update of groups.
1997 if (!systemHasConstraintsOrVsites(mtop))
1999 /* No constraints or vsites, atoms can be updated independently */
2003 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2004 systemInfo->useUpdateGroups =
2005 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2006 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2008 if (systemInfo->useUpdateGroups)
2010 int numUpdateGroups = 0;
2011 for (const auto &molblock : mtop.molblock)
2013 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2016 systemInfo->maxUpdateGroupRadius =
2017 computeMaxUpdateGroupRadius(mtop,
2018 systemInfo->updateGroupingPerMoleculetype,
2019 maxReferenceTemperature(inputrec));
2021 /* To use update groups, the large domain-to-domain cutoff distance
2022 * should be compatible with the box size.
2024 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2026 if (systemInfo->useUpdateGroups)
2028 GMX_LOG(mdlog.info).appendTextFormatted(
2029 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2031 mtop.natoms/static_cast<double>(numUpdateGroups),
2032 systemInfo->maxUpdateGroupRadius);
2036 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2037 systemInfo->updateGroupingPerMoleculetype.clear();
2042 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2043 npbcdim(ePBC2npbcdim(ir.ePBC)),
2044 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2045 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2046 haveScrewPBC(ir.ePBC == epbcSCREW)
2050 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2052 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2053 const bool useUpdateGroups,
2054 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2056 if (useUpdateGroups)
2058 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2059 "Need one grouping per moltype");
2060 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2062 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2070 for (const auto &moltype : mtop.moltype)
2072 if (moltype.atoms.nr > 1)
2082 /*! \brief Generate the simulation system information */
2084 getSystemInfo(const gmx::MDLogger &mdlog,
2085 const t_commrec *cr,
2086 const DomdecOptions &options,
2087 const gmx_mtop_t &mtop,
2088 const t_inputrec &ir,
2090 gmx::ArrayRef<const gmx::RVec> xGlobal)
2092 const real tenPercentMargin = 1.1;
2094 DDSystemInfo systemInfo;
2096 /* We need to decide on update groups early, as this affects communication distances */
2097 systemInfo.useUpdateGroups = false;
2098 if (ir.cutoff_scheme == ecutsVERLET)
2100 real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2101 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2104 systemInfo.moleculesAreAlwaysWhole =
2105 moleculesAreAlwaysWhole(mtop,
2106 systemInfo.useUpdateGroups,
2107 systemInfo.updateGroupingPerMoleculetype);
2108 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2109 mtop.bIntermolecularInteractions);
2110 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2111 multi_body_bondeds_count(&mtop) > 0);
2113 if (systemInfo.useUpdateGroups)
2115 systemInfo.haveSplitConstraints = false;
2116 systemInfo.haveSplitSettles = false;
2120 systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(mtop);
2121 systemInfo.haveSplitSettles = gmx::inter_charge_group_settles(mtop);
2126 /* Set the cut-off to some very large value,
2127 * so we don't need if statements everywhere in the code.
2128 * We use sqrt, since the cut-off is squared in some places.
2130 systemInfo.cutoff = GMX_CUTOFF_INF;
2134 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2136 systemInfo.minCutoffForMultiBody = 0;
2138 /* Determine the minimum cell size limit, affected by many factors */
2139 systemInfo.cellsizeLimit = 0;
2140 systemInfo.filterBondedCommunication = false;
2142 /* We do not allow home atoms to move beyond the neighboring domain
2143 * between domain decomposition steps, which limits the cell size.
2144 * Get an estimate of cell size limit due to atom displacement.
2145 * In most cases this is a large overestimate, because it assumes
2146 * non-interaction atoms.
2147 * We set the chance to 1 in a trillion steps.
2149 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2150 const real limitForAtomDisplacement =
2151 minCellSizeForAtomDisplacement(mtop, ir,
2152 systemInfo.updateGroupingPerMoleculetype,
2153 c_chanceThatAtomMovesBeyondDomain);
2154 GMX_LOG(mdlog.info).appendTextFormatted(
2155 "Minimum cell size due to atom displacement: %.3f nm",
2156 limitForAtomDisplacement);
2158 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2159 limitForAtomDisplacement);
2161 /* TODO: PME decomposition currently requires atoms not to be more than
2162 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2163 * In nearly all cases, limitForAtomDisplacement will be smaller
2164 * than 2/3*rlist, so the PME requirement is satisfied.
2165 * But it would be better for both correctness and performance
2166 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2167 * Note that we would need to improve the pairlist buffer case.
2170 if (systemInfo.haveInterDomainBondeds)
2172 if (options.minimumCommunicationRange > 0)
2174 systemInfo.minCutoffForMultiBody =
2175 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2176 if (options.useBondedCommunication)
2178 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2182 systemInfo.cutoff = std::max(systemInfo.cutoff,
2183 systemInfo.minCutoffForMultiBody);
2186 else if (ir.bPeriodicMols)
2188 /* Can not easily determine the required cut-off */
2189 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2190 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2198 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2199 options.checkBondedInteractions,
2202 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2203 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2205 /* We use an initial margin of 10% for the minimum cell size,
2206 * except when we are just below the non-bonded cut-off.
2208 if (options.useBondedCommunication)
2210 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2212 const real r_bonded = std::max(r_2b, r_mb);
2213 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2214 /* This is the (only) place where we turn on the filtering */
2215 systemInfo.filterBondedCommunication = true;
2219 const real r_bonded = r_mb;
2220 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2223 /* We determine cutoff_mbody later */
2224 systemInfo.increaseMultiBodyCutoff = true;
2228 /* No special bonded communication,
2229 * simply increase the DD cut-off.
2231 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2232 systemInfo.cutoff = std::max(systemInfo.cutoff,
2233 systemInfo.minCutoffForMultiBody);
2236 GMX_LOG(mdlog.info).appendTextFormatted(
2237 "Minimum cell size due to bonded interactions: %.3f nm",
2238 systemInfo.minCutoffForMultiBody);
2240 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2241 systemInfo.minCutoffForMultiBody);
2244 systemInfo.constraintCommunicationRange = 0;
2245 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2247 /* There is a cell size limit due to the constraints (P-LINCS) */
2248 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2249 GMX_LOG(mdlog.info).appendTextFormatted(
2250 "Estimated maximum distance required for P-LINCS: %.3f nm",
2251 systemInfo.constraintCommunicationRange);
2252 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2254 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2257 else if (options.constraintCommunicationRange > 0)
2259 /* Here we do not check for dd->splitConstraints.
2260 * because one can also set a cell size limit for virtual sites only
2261 * and at this point we don't know yet if there are intercg v-sites.
2263 GMX_LOG(mdlog.info).appendTextFormatted(
2264 "User supplied maximum distance required for P-LINCS: %.3f nm",
2265 options.constraintCommunicationRange);
2266 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2268 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2269 systemInfo.constraintCommunicationRange);
2274 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2277 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2278 const t_commrec *cr,
2279 const DomdecOptions &options,
2280 const DDSettings &ddSettings,
2281 const DDSystemInfo &systemInfo,
2282 const real cellsizeLimit,
2283 const gmx_ddbox_t &ddbox)
2285 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2288 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2289 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2290 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2291 !bC ? "-rdd" : "-rcon",
2292 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2293 bC ? " or your LINCS settings" : "");
2295 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2296 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2298 "Look in the log file for details on the domain decomposition",
2299 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2304 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2305 if (acs < cellsizeLimit)
2307 if (options.numCells[XX] <= 0)
2309 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2313 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2314 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2315 acs, cellsizeLimit);
2319 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2320 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2322 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2323 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2324 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2326 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2328 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2329 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2333 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2335 getDDRankSetup(const gmx::MDLogger &mdlog,
2337 const DDGridSetup &ddGridSetup,
2338 const t_inputrec &ir)
2340 GMX_LOG(mdlog.info).appendTextFormatted(
2341 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2342 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2343 ddGridSetup.numPmeOnlyRanks);
2345 DDRankSetup ddRankSetup;
2347 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2348 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2350 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2351 if (ddRankSetup.usePmeOnlyRanks)
2353 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2357 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2360 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2362 /* The following choices should match those
2363 * in comm_cost_est in domdec_setup.c.
2364 * Note that here the checks have to take into account
2365 * that the decomposition might occur in a different order than xyz
2366 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2367 * in which case they will not match those in comm_cost_est,
2368 * but since that is mainly for testing purposes that's fine.
2370 if (ddGridSetup.numDDDimensions >= 2 &&
2371 ddGridSetup.ddDimensions[0] == XX &&
2372 ddGridSetup.ddDimensions[1] == YY &&
2373 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2374 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2375 getenv("GMX_PMEONEDD") == nullptr)
2377 ddRankSetup.npmedecompdim = 2;
2378 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2379 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2383 /* In case nc is 1 in both x and y we could still choose to
2384 * decompose pme in y instead of x, but we use x for simplicity.
2386 ddRankSetup.npmedecompdim = 1;
2387 if (ddGridSetup.ddDimensions[0] == YY)
2389 ddRankSetup.npmenodes_x = 1;
2390 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2394 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2395 ddRankSetup.npmenodes_y = 1;
2398 GMX_LOG(mdlog.info).appendTextFormatted(
2399 "PME domain decomposition: %d x %d x %d",
2400 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2404 ddRankSetup.npmedecompdim = 0;
2405 ddRankSetup.npmenodes_x = 0;
2406 ddRankSetup.npmenodes_y = 0;
2412 /*! \brief Set the cell size and interaction limits */
2413 static void set_dd_limits(const gmx::MDLogger &mdlog,
2414 t_commrec *cr, gmx_domdec_t *dd,
2415 const DomdecOptions &options,
2416 const DDSettings &ddSettings,
2417 const DDSystemInfo &systemInfo,
2418 const DDGridSetup &ddGridSetup,
2419 const int numPPRanks,
2420 const gmx_mtop_t *mtop,
2421 const t_inputrec *ir,
2422 const gmx_ddbox_t &ddbox)
2424 gmx_domdec_comm_t *comm = dd->comm;
2425 comm->ddSettings = ddSettings;
2427 /* Initialize to GPU share count to 0, might change later */
2428 comm->nrank_gpu_shared = 0;
2430 comm->dlbState = comm->ddSettings.initialDlbState;
2431 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2432 /* To consider turning DLB on after 2*nstlist steps we need to check
2433 * at partitioning count 3. Thus we need to increase the first count by 2.
2435 comm->ddPartioningCountFirstDlbOff += 2;
2437 comm->bPMELoadBalDLBLimits = FALSE;
2439 /* Allocate the charge group/atom sorting struct */
2440 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2442 comm->systemInfo = systemInfo;
2444 if (systemInfo.useUpdateGroups)
2446 /* Note: We would like to use dd->nnodes for the atom count estimate,
2447 * but that is not yet available here. But this anyhow only
2448 * affect performance up to the second dd_partition_system call.
2450 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2451 comm->updateGroupsCog =
2452 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2453 systemInfo.updateGroupingPerMoleculetype,
2454 maxReferenceTemperature(*ir),
2455 homeAtomCountEstimate);
2458 /* Set the DD setup given by ddGridSetup */
2459 copy_ivec(ddGridSetup.numDomains, dd->nc);
2460 dd->ndim = ddGridSetup.numDDDimensions;
2461 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2463 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2465 snew(comm->slb_frac, DIM);
2466 if (isDlbDisabled(comm))
2468 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2469 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2470 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2473 /* Set the multi-body cut-off and cellsize limit for DLB */
2474 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2475 comm->cellsize_limit = systemInfo.cellsizeLimit;
2476 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2478 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2480 /* Set the bonded communication distance to halfway
2481 * the minimum and the maximum,
2482 * since the extra communication cost is nearly zero.
2484 real acs = average_cellsize_min(ddbox, dd->nc);
2485 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2486 if (!isDlbDisabled(comm))
2488 /* Check if this does not limit the scaling */
2489 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2490 options.dlbScaling*acs);
2492 if (!systemInfo.filterBondedCommunication)
2494 /* Without bBondComm do not go beyond the n.b. cut-off */
2495 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2496 if (comm->cellsize_limit >= systemInfo.cutoff)
2498 /* We don't loose a lot of efficieny
2499 * when increasing it to the n.b. cut-off.
2500 * It can even be slightly faster, because we need
2501 * less checks for the communication setup.
2503 comm->cutoff_mbody = systemInfo.cutoff;
2506 /* Check if we did not end up below our original limit */
2507 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2508 systemInfo.minCutoffForMultiBody);
2510 if (comm->cutoff_mbody > comm->cellsize_limit)
2512 comm->cellsize_limit = comm->cutoff_mbody;
2515 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2520 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2521 "cellsize limit %f\n",
2522 gmx::boolToString(systemInfo.filterBondedCommunication),
2523 comm->cellsize_limit);
2528 check_dd_restrictions(dd, ir, mdlog);
2532 void dd_init_bondeds(FILE *fplog,
2534 const gmx_mtop_t *mtop,
2535 const gmx_vsite_t *vsite,
2536 const t_inputrec *ir,
2538 cginfo_mb_t *cginfo_mb)
2540 gmx_domdec_comm_t *comm;
2542 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2546 if (comm->systemInfo.filterBondedCommunication)
2548 /* Communicate atoms beyond the cut-off for bonded interactions */
2549 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2553 /* Only communicate atoms based on cut-off */
2554 comm->bondedLinks = nullptr;
2558 static void writeSettings(gmx::TextWriter *log,
2560 const gmx_mtop_t *mtop,
2561 const t_inputrec *ir,
2562 gmx_bool bDynLoadBal,
2564 const gmx_ddbox_t *ddbox)
2566 gmx_domdec_comm_t *comm;
2575 log->writeString("The maximum number of communication pulses is:");
2576 for (d = 0; d < dd->ndim; d++)
2578 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2580 log->ensureLineBreak();
2581 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2582 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2583 log->writeString("The allowed shrink of domain decomposition cells is:");
2584 for (d = 0; d < DIM; d++)
2588 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2595 comm->cellsize_min_dlb[d]/
2596 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2598 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2601 log->ensureLineBreak();
2605 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2606 log->writeString("The initial number of communication pulses is:");
2607 for (d = 0; d < dd->ndim; d++)
2609 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2611 log->ensureLineBreak();
2612 log->writeString("The initial domain decomposition cell size is:");
2613 for (d = 0; d < DIM; d++)
2617 log->writeStringFormatted(" %c %.2f nm",
2618 dim2char(d), dd->comm->cellsize_min[d]);
2621 log->ensureLineBreak();
2625 const bool haveInterDomainVsites =
2626 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2628 if (comm->systemInfo.haveInterDomainBondeds ||
2629 haveInterDomainVsites ||
2630 comm->systemInfo.haveSplitConstraints ||
2631 comm->systemInfo.haveSplitSettles)
2633 std::string decompUnits;
2634 if (comm->systemInfo.useUpdateGroups)
2636 decompUnits = "atom groups";
2640 decompUnits = "atoms";
2643 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2644 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2648 limit = dd->comm->cellsize_limit;
2652 if (dd->unitCellInfo.ddBoxIsDynamic)
2654 log->writeLine("(the following are initial values, they could change due to box deformation)");
2656 limit = dd->comm->cellsize_min[XX];
2657 for (d = 1; d < DIM; d++)
2659 limit = std::min(limit, dd->comm->cellsize_min[d]);
2663 if (comm->systemInfo.haveInterDomainBondeds)
2665 log->writeLineFormatted("%40s %-7s %6.3f nm",
2666 "two-body bonded interactions", "(-rdd)",
2667 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2668 log->writeLineFormatted("%40s %-7s %6.3f nm",
2669 "multi-body bonded interactions", "(-rdd)",
2670 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2672 if (haveInterDomainVsites)
2674 log->writeLineFormatted("%40s %-7s %6.3f nm",
2675 "virtual site constructions", "(-rcon)", limit);
2677 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2679 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2681 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2682 separation.c_str(), "(-rcon)", limit);
2684 log->ensureLineBreak();
2688 static void logSettings(const gmx::MDLogger &mdlog,
2690 const gmx_mtop_t *mtop,
2691 const t_inputrec *ir,
2693 const gmx_ddbox_t *ddbox)
2695 gmx::StringOutputStream stream;
2696 gmx::TextWriter log(&stream);
2697 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2698 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2701 log.ensureEmptyLine();
2702 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2704 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2706 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2709 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2712 const t_inputrec *ir,
2713 const gmx_ddbox_t *ddbox)
2715 gmx_domdec_comm_t *comm;
2716 int d, dim, npulse, npulse_d_max, npulse_d;
2721 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2723 /* Determine the maximum number of comm. pulses in one dimension */
2725 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2727 /* Determine the maximum required number of grid pulses */
2728 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2730 /* Only a single pulse is required */
2733 else if (!bNoCutOff && comm->cellsize_limit > 0)
2735 /* We round down slightly here to avoid overhead due to the latency
2736 * of extra communication calls when the cut-off
2737 * would be only slightly longer than the cell size.
2738 * Later cellsize_limit is redetermined,
2739 * so we can not miss interactions due to this rounding.
2741 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2745 /* There is no cell size limit */
2746 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2749 if (!bNoCutOff && npulse > 1)
2751 /* See if we can do with less pulses, based on dlb_scale */
2753 for (d = 0; d < dd->ndim; d++)
2756 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2757 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2758 npulse_d_max = std::max(npulse_d_max, npulse_d);
2760 npulse = std::min(npulse, npulse_d_max);
2763 /* This env var can override npulse */
2764 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2771 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2772 for (d = 0; d < dd->ndim; d++)
2774 if (comm->ddSettings.request1DAnd1Pulse)
2776 comm->cd[d].np_dlb = 1;
2780 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2781 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2783 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2785 comm->bVacDLBNoLimit = FALSE;
2789 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2790 if (!comm->bVacDLBNoLimit)
2792 comm->cellsize_limit = std::max(comm->cellsize_limit,
2793 comm->systemInfo.cutoff/comm->maxpulse);
2795 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2796 /* Set the minimum cell size for each DD dimension */
2797 for (d = 0; d < dd->ndim; d++)
2799 if (comm->bVacDLBNoLimit ||
2800 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2802 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2806 comm->cellsize_min_dlb[dd->dim[d]] =
2807 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2810 if (comm->cutoff_mbody <= 0)
2812 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2820 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2822 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2825 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2827 /* If each molecule is a single charge group
2828 * or we use domain decomposition for each periodic dimension,
2829 * we do not need to take pbc into account for the bonded interactions.
2831 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2834 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2837 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2838 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2839 gmx_domdec_t *dd, real dlb_scale,
2840 const gmx_mtop_t *mtop, const t_inputrec *ir,
2841 const gmx_ddbox_t *ddbox)
2843 gmx_domdec_comm_t *comm = dd->comm;
2844 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2846 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2848 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2849 if (ddRankSetup.npmedecompdim >= 2)
2851 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2856 ddRankSetup.numRanksDoingPme = 0;
2857 if (dd->pme_nodeid >= 0)
2859 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2860 "Can not have separate PME ranks without PME electrostatics");
2866 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2868 if (!isDlbDisabled(comm))
2870 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2873 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2876 if (ir->ePBC == epbcNONE)
2878 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2883 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2887 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2889 int natoms_tot = mtop->natoms;
2891 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2892 static_cast<int>(vol_frac*natoms_tot));
2895 /*! \brief Get some important DD parameters which can be modified by env.vars */
2897 getDDSettings(const gmx::MDLogger &mdlog,
2898 const DomdecOptions &options,
2899 const gmx::MdrunOptions &mdrunOptions,
2900 const t_inputrec &ir)
2902 DDSettings ddSettings;
2904 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2905 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2906 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2907 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2908 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2909 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2910 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2911 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2912 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2913 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2915 if (ddSettings.useSendRecv2)
2917 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2920 if (ddSettings.eFlop)
2922 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2923 ddSettings.recordLoad = true;
2927 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2930 ddSettings.initialDlbState =
2931 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2932 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2933 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2938 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2943 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2945 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2946 * generally requires a larger box than other possible decompositions
2947 * with the same rank count, so the calling code might need to decide
2948 * what is the most appropriate way to run the simulation based on
2949 * whether such a DD is possible.
2951 * This function works like init_domain_decomposition(), but will not
2952 * give a fatal error, and only uses \c cr for communicating between
2955 * It is safe to call before thread-MPI spawns ranks, so that
2956 * thread-MPI can decide whether and how to trigger the GPU halo
2957 * exchange code path. The number of PME ranks, if any, should be set
2958 * in \c options.numPmeRanks.
2961 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
2962 const t_commrec *cr,
2963 const int numRanksRequested,
2964 const DomdecOptions &options,
2965 const gmx_mtop_t &mtop,
2966 const t_inputrec &ir,
2968 gmx::ArrayRef<const gmx::RVec> xGlobal)
2970 // Ensure we don't write any output from this checking routine
2971 gmx::MDLogger dummyLogger;
2973 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2975 DDSettings ddSettings = ddSettingsOriginal;
2976 ddSettings.request1DAnd1Pulse = true;
2977 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2978 !isDlbDisabled(ddSettings.initialDlbState),
2979 options.dlbScaling, ir,
2980 systemInfo.cellsizeLimit);
2981 gmx_ddbox_t ddbox = {0};
2982 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2983 ddSettings, systemInfo, gridSetupCellsizeLimit,
2984 mtop, ir, box, xGlobal, &ddbox);
2986 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2988 return canMakeDDWith1DAnd1Pulse;
2991 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2993 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2994 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2995 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2997 const bool hasMax1Pulse =
2998 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
2999 (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3001 return decompositionHasOneDimension && hasMax1Pulse;
3008 // TODO once the functionality stablizes, move this class and
3009 // supporting functionality into builder.cpp
3010 /*! \brief Impl class for DD builder */
3011 class DomainDecompositionBuilder::Impl
3015 Impl(const MDLogger &mdlog,
3017 const DomdecOptions &options,
3018 const MdrunOptions &mdrunOptions,
3019 bool prefer1DAnd1Pulse,
3020 const gmx_mtop_t &mtop,
3021 const t_inputrec &ir,
3023 ArrayRef<const RVec> xGlobal);
3025 //! Build the resulting DD manager
3026 gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3028 //! Objects used in constructing and configuring DD
3031 const MDLogger &mdlog_;
3032 //! Communication object
3034 //! User-supplied options configuring DD behavior
3035 const DomdecOptions options_;
3036 //! Global system topology
3037 const gmx_mtop_t &mtop_;
3038 //! User input values from the tpr file
3039 const t_inputrec &ir_;
3042 //! Internal objects used in constructing DD
3044 //! Settings combined from the user input
3045 DDSettings ddSettings_;
3046 //! Information derived from the simulation system
3047 DDSystemInfo systemInfo_;
3049 gmx_ddbox_t ddbox_ = { 0 };
3050 //! Organization of the DD grids
3051 DDGridSetup ddGridSetup_;
3052 //! Organzation of the DD ranks
3053 DDRankSetup ddRankSetup_;
3054 //! Number of DD cells in each dimension
3055 ivec ddCellIndex_ = { 0, 0, 0 };
3056 //! IDs of PME-only ranks
3057 std::vector<int> pmeRanks_;
3058 //! Contains a valid Cartesian-communicator-based setup, or defaults.
3059 CartesianRankSetup cartSetup_;
3064 DomainDecompositionBuilder::Impl::Impl(const MDLogger &mdlog,
3066 const DomdecOptions &options,
3067 const MdrunOptions &mdrunOptions,
3068 const bool prefer1DAnd1Pulse,
3069 const gmx_mtop_t &mtop,
3070 const t_inputrec &ir,
3072 ArrayRef<const RVec> xGlobal)
3079 GMX_LOG(mdlog_.info).appendTextFormatted(
3080 "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3082 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3084 if (prefer1DAnd1Pulse &&
3085 canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3086 mtop_, ir_, box, xGlobal))
3088 ddSettings_.request1DAnd1Pulse = true;
3091 if (ddSettings_.eFlop > 1)
3093 /* Ensure that we have different random flop counts on different ranks */
3094 srand(1 + cr_->nodeid);
3097 systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3099 const int numRanksRequested = cr_->nnodes;
3100 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3102 // DD grid setup uses a more different cell size limit for
3103 // automated setup than the one in systemInfo_. The latter is used
3104 // in set_dd_limits() to configure DLB, for example.
3105 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3106 !isDlbDisabled(ddSettings_.initialDlbState),
3107 options_.dlbScaling, ir_,
3108 systemInfo_.cellsizeLimit);
3109 ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3110 ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3111 mtop_, ir_, box, xGlobal, &ddbox_);
3112 checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3114 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3116 ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3118 /* Generate the group communicator, also decides the duty of each rank */
3120 makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3122 ddCellIndex_, &pmeRanks_);
3125 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3127 gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3129 copy_ivec(ddCellIndex_, dd->ci);
3131 dd->comm = init_dd_comm();
3133 dd->comm->ddRankSetup = ddRankSetup_;
3134 dd->comm->cartesianRankSetup = cartSetup_;
3136 set_dd_limits(mdlog_, cr_, dd, options_,
3137 ddSettings_, systemInfo_,
3139 ddRankSetup_.numPPRanks,
3143 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3145 if (thisRankHasDuty(cr_, DUTY_PP))
3147 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3149 setup_neighbor_relations(dd);
3152 /* Set overallocation to avoid frequent reallocation of arrays */
3153 set_over_alloc_dd(TRUE);
3155 dd->atomSets = atomSets;
3160 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger &mdlog,
3162 const DomdecOptions &options,
3163 const MdrunOptions &mdrunOptions,
3164 const bool prefer1DAnd1Pulse,
3165 const gmx_mtop_t &mtop,
3166 const t_inputrec &ir,
3168 ArrayRef<const RVec> xGlobal)
3169 : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3173 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3175 return impl_->build(atomSets);
3178 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3182 static gmx_bool test_dd_cutoff(t_commrec *cr,
3184 gmx::ArrayRef<const gmx::RVec> x,
3185 real cutoffRequested)
3195 set_ddbox(*dd, false, box, true, x, &ddbox);
3199 for (d = 0; d < dd->ndim; d++)
3203 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3204 if (dd->unitCellInfo.ddBoxIsDynamic)
3206 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3209 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3211 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3216 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3218 if (np > dd->comm->cd[d].np_dlb)
3223 /* If a current local cell size is smaller than the requested
3224 * cut-off, we could still fix it, but this gets very complicated.
3225 * Without fixing here, we might actually need more checks.
3227 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3228 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3235 if (!isDlbDisabled(dd->comm))
3237 /* If DLB is not active yet, we don't need to check the grid jumps.
3238 * Actually we shouldn't, because then the grid jump data is not set.
3240 if (isDlbOn(dd->comm) &&
3241 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3246 gmx_sumi(1, &LocallyLimited, cr);
3248 if (LocallyLimited > 0)
3257 gmx_bool change_dd_cutoff(t_commrec *cr,
3259 gmx::ArrayRef<const gmx::RVec> x,
3260 real cutoffRequested)
3262 gmx_bool bCutoffAllowed;
3264 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3268 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3271 return bCutoffAllowed;