2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/builder.h"
54 #include "gromacs/domdec/collect.h"
55 #include "gromacs/domdec/dlb.h"
56 #include "gromacs/domdec/dlbtiming.h"
57 #include "gromacs/domdec/domdec_network.h"
58 #include "gromacs/domdec/ga2la.h"
59 #include "gromacs/domdec/gpuhaloexchange.h"
60 #include "gromacs/domdec/options.h"
61 #include "gromacs/domdec/partition.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/listed_forces/manage_threading.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/math/vectypes.h"
69 #include "gromacs/mdlib/calc_verletbuf.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/updategroups.h"
73 #include "gromacs/mdlib/vsite.h"
74 #include "gromacs/mdtypes/commrec.h"
75 #include "gromacs/mdtypes/forceoutput.h"
76 #include "gromacs/mdtypes/inputrec.h"
77 #include "gromacs/mdtypes/mdrunoptions.h"
78 #include "gromacs/mdtypes/state.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/pulling/pull.h"
82 #include "gromacs/timing/wallcycle.h"
83 #include "gromacs/topology/block.h"
84 #include "gromacs/topology/idef.h"
85 #include "gromacs/topology/ifunc.h"
86 #include "gromacs/topology/mtop_lookup.h"
87 #include "gromacs/topology/mtop_util.h"
88 #include "gromacs/topology/topology.h"
89 #include "gromacs/utility/basedefinitions.h"
90 #include "gromacs/utility/basenetwork.h"
91 #include "gromacs/utility/cstringutil.h"
92 #include "gromacs/utility/exceptions.h"
93 #include "gromacs/utility/fatalerror.h"
94 #include "gromacs/utility/gmxmpi.h"
95 #include "gromacs/utility/logger.h"
96 #include "gromacs/utility/real.h"
97 #include "gromacs/utility/smalloc.h"
98 #include "gromacs/utility/strconvert.h"
99 #include "gromacs/utility/stringstream.h"
100 #include "gromacs/utility/stringutil.h"
101 #include "gromacs/utility/textwriter.h"
103 #include "atomdistribution.h"
105 #include "cellsizes.h"
106 #include "distribute.h"
107 #include "domdec_constraints.h"
108 #include "domdec_internal.h"
109 #include "domdec_setup.h"
110 #include "domdec_vsite.h"
111 #include "redistribute.h"
114 // TODO remove this when moving domdec into gmx namespace
115 using gmx::DdRankOrder;
116 using gmx::DlbOption;
117 using gmx::DomdecOptions;
119 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
121 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
124 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_FLAG_NRCG 65535
126 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
127 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
129 /* The DD zone order */
130 static const ivec dd_zo[DD_MAXZONE] =
131 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
133 /* The non-bonded zone-pair setup for domain decomposition
134 * The first number is the i-zone, the second number the first j-zone seen by
135 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
136 * As is, this is for 3D decomposition, where there are 4 i-zones.
137 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
138 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
141 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
148 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
150 static void index2xyz(ivec nc,int ind,ivec xyz)
152 xyz[XX] = ind % nc[XX];
153 xyz[YY] = (ind / nc[XX]) % nc[YY];
154 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
158 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
160 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
161 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
162 xyz[ZZ] = ind % nc[ZZ];
165 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
169 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
170 const int ddindex = dd_index(dd->nc, c);
171 if (cartSetup.bCartesianPP_PME)
173 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
175 else if (cartSetup.bCartesianPP)
178 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
189 int ddglatnr(const gmx_domdec_t *dd, int i)
199 if (i >= dd->comm->atomRanges.numAtomsTotal())
201 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
203 atnr = dd->globalAtomIndices[i] + 1;
209 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
211 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
212 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
215 void dd_store_state(gmx_domdec_t *dd, t_state *state)
219 if (state->ddp_count != dd->ddp_count)
221 gmx_incons("The MD state does not match the domain decomposition state");
224 state->cg_gl.resize(dd->ncg_home);
225 for (i = 0; i < dd->ncg_home; i++)
227 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
230 state->ddp_count_cg_gl = dd->ddp_count;
233 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
235 return &dd->comm->zones;
238 int dd_numHomeAtoms(const gmx_domdec_t &dd)
240 return dd.comm->atomRanges.numHomeAtoms();
243 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
245 /* We currently set mdatoms entries for all atoms:
246 * local + non-local + communicated for vsite + constraints
249 return dd->comm->atomRanges.numAtomsTotal();
252 int dd_natoms_vsite(const gmx_domdec_t *dd)
254 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
257 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
259 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
260 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
263 void dd_move_x(gmx_domdec_t *dd,
265 gmx::ArrayRef<gmx::RVec> x,
266 gmx_wallcycle *wcycle)
268 wallcycle_start(wcycle, ewcMOVEX);
271 gmx_domdec_comm_t *comm;
272 gmx_domdec_comm_dim_t *cd;
273 rvec shift = {0, 0, 0};
274 gmx_bool bPBC, bScrew;
279 nat_tot = comm->atomRanges.numHomeAtoms();
280 for (int d = 0; d < dd->ndim; d++)
282 bPBC = (dd->ci[dd->dim[d]] == 0);
283 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
286 copy_rvec(box[dd->dim[d]], shift);
289 for (const gmx_domdec_ind_t &ind : cd->ind)
291 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
292 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
296 for (int j : ind.index)
298 sendBuffer[n] = x[j];
304 for (int j : ind.index)
306 /* We need to shift the coordinates */
307 for (int d = 0; d < DIM; d++)
309 sendBuffer[n][d] = x[j][d] + shift[d];
316 for (int j : ind.index)
319 sendBuffer[n][XX] = x[j][XX] + shift[XX];
321 * This operation requires a special shift force
322 * treatment, which is performed in calc_vir.
324 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
325 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
330 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
332 gmx::ArrayRef<gmx::RVec> receiveBuffer;
333 if (cd->receiveInPlace)
335 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
339 receiveBuffer = receiveBufferAccess.buffer;
341 /* Send and receive the coordinates */
342 ddSendrecv(dd, d, dddirBackward,
343 sendBuffer, receiveBuffer);
345 if (!cd->receiveInPlace)
348 for (int zone = 0; zone < nzone; zone++)
350 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
352 x[i] = receiveBuffer[j++];
356 nat_tot += ind.nrecv[nzone+1];
361 wallcycle_stop(wcycle, ewcMOVEX);
364 void dd_move_f(gmx_domdec_t *dd,
365 gmx::ForceWithShiftForces *forceWithShiftForces,
366 gmx_wallcycle *wcycle)
368 wallcycle_start(wcycle, ewcMOVEF);
370 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
371 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
373 gmx_domdec_comm_t &comm = *dd->comm;
374 int nzone = comm.zones.n/2;
375 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
376 for (int d = dd->ndim-1; d >= 0; d--)
378 /* Only forces in domains near the PBC boundaries need to
379 consider PBC in the treatment of fshift */
380 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
381 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
382 /* Determine which shift vector we need */
383 ivec vis = { 0, 0, 0 };
385 const int is = IVEC2IS(vis);
387 /* Loop over the pulses */
388 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
389 for (int p = cd.numPulses() - 1; p >= 0; p--)
391 const gmx_domdec_ind_t &ind = cd.ind[p];
392 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
393 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
395 nat_tot -= ind.nrecv[nzone+1];
397 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
399 gmx::ArrayRef<gmx::RVec> sendBuffer;
400 if (cd.receiveInPlace)
402 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
406 sendBuffer = sendBufferAccess.buffer;
408 for (int zone = 0; zone < nzone; zone++)
410 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
412 sendBuffer[j++] = f[i];
416 /* Communicate the forces */
417 ddSendrecv(dd, d, dddirForward,
418 sendBuffer, receiveBuffer);
419 /* Add the received forces */
421 if (!shiftForcesNeedPbc)
423 for (int j : ind.index)
425 for (int d = 0; d < DIM; d++)
427 f[j][d] += receiveBuffer[n][d];
432 else if (!applyScrewPbc)
434 for (int j : ind.index)
436 for (int d = 0; d < DIM; d++)
438 f[j][d] += receiveBuffer[n][d];
440 /* Add this force to the shift force */
441 for (int d = 0; d < DIM; d++)
443 fshift[is][d] += receiveBuffer[n][d];
450 for (int j : ind.index)
452 /* Rotate the force */
453 f[j][XX] += receiveBuffer[n][XX];
454 f[j][YY] -= receiveBuffer[n][YY];
455 f[j][ZZ] -= receiveBuffer[n][ZZ];
456 if (shiftForcesNeedPbc)
458 /* Add this force to the shift force */
459 for (int d = 0; d < DIM; d++)
461 fshift[is][d] += receiveBuffer[n][d];
470 wallcycle_stop(wcycle, ewcMOVEF);
473 /* Convenience function for extracting a real buffer from an rvec buffer
475 * To reduce the number of temporary communication buffers and avoid
476 * cache polution, we reuse gmx::RVec buffers for storing reals.
477 * This functions return a real buffer reference with the same number
478 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
480 static gmx::ArrayRef<real>
481 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
483 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
487 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
490 gmx_domdec_comm_t *comm;
491 gmx_domdec_comm_dim_t *cd;
496 nat_tot = comm->atomRanges.numHomeAtoms();
497 for (int d = 0; d < dd->ndim; d++)
500 for (const gmx_domdec_ind_t &ind : cd->ind)
502 /* Note: We provision for RVec instead of real, so a factor of 3
503 * more than needed. The buffer actually already has this size
504 * and we pass a plain pointer below, so this does not matter.
506 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
507 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
509 for (int j : ind.index)
511 sendBuffer[n++] = v[j];
514 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
516 gmx::ArrayRef<real> receiveBuffer;
517 if (cd->receiveInPlace)
519 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
523 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
525 /* Send and receive the data */
526 ddSendrecv(dd, d, dddirBackward,
527 sendBuffer, receiveBuffer);
528 if (!cd->receiveInPlace)
531 for (int zone = 0; zone < nzone; zone++)
533 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
535 v[i] = receiveBuffer[j++];
539 nat_tot += ind.nrecv[nzone+1];
545 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
548 gmx_domdec_comm_t *comm;
549 gmx_domdec_comm_dim_t *cd;
553 nzone = comm->zones.n/2;
554 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
555 for (int d = dd->ndim-1; d >= 0; d--)
558 for (int p = cd->numPulses() - 1; p >= 0; p--)
560 const gmx_domdec_ind_t &ind = cd->ind[p];
562 /* Note: We provision for RVec instead of real, so a factor of 3
563 * more than needed. The buffer actually already has this size
564 * and we typecast, so this works as intended.
566 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
567 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
568 nat_tot -= ind.nrecv[nzone + 1];
570 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
572 gmx::ArrayRef<real> sendBuffer;
573 if (cd->receiveInPlace)
575 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
579 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
581 for (int zone = 0; zone < nzone; zone++)
583 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
585 sendBuffer[j++] = v[i];
589 /* Communicate the forces */
590 ddSendrecv(dd, d, dddirForward,
591 sendBuffer, receiveBuffer);
592 /* Add the received forces */
594 for (int j : ind.index)
596 v[j] += receiveBuffer[n];
604 real dd_cutoff_multibody(const gmx_domdec_t *dd)
606 const gmx_domdec_comm_t &comm = *dd->comm;
607 const DDSystemInfo &systemInfo = comm.systemInfo;
610 if (systemInfo.haveInterDomainMultiBodyBondeds)
612 if (comm.cutoff_mbody > 0)
614 r = comm.cutoff_mbody;
618 /* cutoff_mbody=0 means we do not have DLB */
619 r = comm.cellsize_min[dd->dim[0]];
620 for (int di = 1; di < dd->ndim; di++)
622 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
624 if (comm.systemInfo.filterBondedCommunication)
626 r = std::max(r, comm.cutoff_mbody);
630 r = std::min(r, systemInfo.cutoff);
638 real dd_cutoff_twobody(const gmx_domdec_t *dd)
642 r_mb = dd_cutoff_multibody(dd);
644 return std::max(dd->comm->systemInfo.cutoff, r_mb);
648 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
649 const CartesianRankSetup &cartSetup,
653 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
654 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
655 copy_ivec(coord, coord_pme);
656 coord_pme[cartSetup.cartpmedim] =
657 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
660 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
661 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
662 const int ddCellIndex)
664 const int npp = ddRankSetup.numPPRanks;
665 const int npme = ddRankSetup.numRanksDoingPme;
667 /* Here we assign a PME node to communicate with this DD node
668 * by assuming that the major index of both is x.
669 * We add npme/2 to obtain an even distribution.
671 return (ddCellIndex*npme + npme/2)/npp;
674 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
676 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
679 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
681 const int p0 = ddindex2pmeindex(ddRankSetup, i);
682 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
683 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
687 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
689 pmeRanks[n] = i + 1 + n;
697 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
705 if (dd->comm->bCartesian) {
706 gmx_ddindex2xyz(dd->nc,ddindex,coords);
707 dd_coords2pmecoords(dd,coords,coords_pme);
708 copy_ivec(dd->ntot,nc);
709 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
710 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
712 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
714 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
720 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
725 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
727 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
728 ivec coords = { x, y, z };
731 if (cartSetup.bCartesianPP_PME)
734 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
739 int ddindex = dd_index(cr->dd->nc, coords);
740 if (cartSetup.bCartesianPP)
742 nodeid = cartSetup.ddindex2simnodeid[ddindex];
746 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
748 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
760 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
761 const CartesianRankSetup &cartSetup,
762 gmx::ArrayRef<const int> pmeRanks,
763 const t_commrec gmx_unused *cr,
764 const int sim_nodeid)
768 /* This assumes a uniform x domain decomposition grid cell size */
769 if (cartSetup.bCartesianPP_PME)
772 ivec coord, coord_pme;
773 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
774 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
776 /* This is a PP rank */
777 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
778 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
782 else if (cartSetup.bCartesianPP)
784 if (sim_nodeid < ddRankSetup.numPPRanks)
786 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
791 /* This assumes DD cells with identical x coordinates
792 * are numbered sequentially.
794 if (pmeRanks.empty())
796 if (sim_nodeid < ddRankSetup.numPPRanks)
798 /* The DD index equals the nodeid */
799 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
805 while (sim_nodeid > pmeRanks[i])
809 if (sim_nodeid < pmeRanks[i])
811 pmenode = pmeRanks[i];
819 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
824 dd->comm->ddRankSetup.npmenodes_x,
825 dd->comm->ddRankSetup.npmenodes_y
836 std::vector<int> get_pme_ddranks(const t_commrec *cr,
839 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
840 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
841 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
842 std::vector<int> ddranks;
843 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
845 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
847 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
849 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
851 if (cartSetup.bCartesianPP_PME)
853 ivec coord = { x, y, z };
855 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
856 if (cr->dd->ci[XX] == coord_pme[XX] &&
857 cr->dd->ci[YY] == coord_pme[YY] &&
858 cr->dd->ci[ZZ] == coord_pme[ZZ])
860 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
865 /* The slab corresponds to the nodeid in the PME group */
866 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
868 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
877 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
878 gmx::ArrayRef<const int> pmeRanks,
881 gmx_bool bReceive = TRUE;
883 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
884 if (ddRankSetup.usePmeOnlyRanks)
886 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
887 if (cartSetup.bCartesianPP_PME)
890 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
892 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
893 coords[cartSetup.cartpmedim]++;
894 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
897 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
898 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
900 /* This is not the last PP node for pmenode */
905 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
910 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
911 if (cr->sim_nodeid+1 < cr->nnodes &&
912 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
914 /* This is not the last PP node for pmenode */
923 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
925 gmx_domdec_comm_t *comm;
930 snew(*dim_f, dd->nc[dim]+1);
932 for (i = 1; i < dd->nc[dim]; i++)
934 if (comm->slb_frac[dim])
936 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
940 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
943 (*dim_f)[dd->nc[dim]] = 1;
946 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
948 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
950 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
958 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
960 ddpme->nslab = (ddpme->dim == 0 ?
961 ddRankSetup.npmenodes_x :
962 ddRankSetup.npmenodes_y);
964 if (ddpme->nslab <= 1)
969 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
970 /* Determine for each PME slab the PP location range for dimension dim */
971 snew(ddpme->pp_min, ddpme->nslab);
972 snew(ddpme->pp_max, ddpme->nslab);
973 for (int slab = 0; slab < ddpme->nslab; slab++)
975 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
976 ddpme->pp_max[slab] = 0;
978 for (int i = 0; i < dd->nnodes; i++)
981 ddindex2xyz(dd->nc, i, xyz);
982 /* For y only use our y/z slab.
983 * This assumes that the PME x grid size matches the DD grid size.
985 if (dimind == 0 || xyz[XX] == dd->ci[XX])
987 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
995 slab = pmeindex % ddpme->nslab;
997 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
998 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1002 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1005 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1007 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1009 if (ddRankSetup.ddpme[0].dim == XX)
1011 return ddRankSetup.ddpme[0].maxshift;
1019 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1021 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1023 if (ddRankSetup.ddpme[0].dim == YY)
1025 return ddRankSetup.ddpme[0].maxshift;
1027 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1029 return ddRankSetup.ddpme[1].maxshift;
1037 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1039 return dd.comm->systemInfo.haveSplitConstraints;
1042 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1044 /* Note that the cycles value can be incorrect, either 0 or some
1045 * extremely large value, when our thread migrated to another core
1046 * with an unsynchronized cycle counter. If this happens less often
1047 * that once per nstlist steps, this will not cause issues, since
1048 * we later subtract the maximum value from the sum over nstlist steps.
1049 * A zero count will slightly lower the total, but that's a small effect.
1050 * Note that the main purpose of the subtraction of the maximum value
1051 * is to avoid throwing off the load balancing when stalls occur due
1052 * e.g. system activity or network congestion.
1054 dd->comm->cycl[ddCycl] += cycles;
1055 dd->comm->cycl_n[ddCycl]++;
1056 if (cycles > dd->comm->cycl_max[ddCycl])
1058 dd->comm->cycl_max[ddCycl] = cycles;
1063 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1068 gmx_bool bPartOfGroup = FALSE;
1070 dim = dd->dim[dim_ind];
1071 copy_ivec(loc, loc_c);
1072 for (i = 0; i < dd->nc[dim]; i++)
1075 rank = dd_index(dd->nc, loc_c);
1076 if (rank == dd->rank)
1078 /* This process is part of the group */
1079 bPartOfGroup = TRUE;
1082 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1086 dd->comm->mpi_comm_load[dim_ind] = c_row;
1087 if (!isDlbDisabled(dd->comm))
1089 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1091 if (dd->ci[dim] == dd->master_ci[dim])
1093 /* This is the root process of this row */
1094 cellsizes.rowMaster = std::make_unique<RowMaster>();
1096 RowMaster &rowMaster = *cellsizes.rowMaster;
1097 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1098 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1099 rowMaster.isCellMin.resize(dd->nc[dim]);
1102 rowMaster.bounds.resize(dd->nc[dim]);
1104 rowMaster.buf_ncd.resize(dd->nc[dim]);
1108 /* This is not a root process, we only need to receive cell_f */
1109 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1112 if (dd->ci[dim] == dd->master_ci[dim])
1114 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1120 void dd_setup_dlb_resource_sharing(const t_commrec *cr,
1124 int physicalnode_id_hash;
1126 MPI_Comm mpi_comm_pp_physicalnode;
1128 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1130 /* Only ranks with short-ranged tasks (currently) use GPUs.
1131 * If we don't have GPUs assigned, there are no resources to share.
1136 physicalnode_id_hash = gmx_physicalnode_id_hash();
1142 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1143 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1144 dd->rank, physicalnode_id_hash, gpu_id);
1146 /* Split the PP communicator over the physical nodes */
1147 /* TODO: See if we should store this (before), as it's also used for
1148 * for the nodecomm summation.
1150 // TODO PhysicalNodeCommunicator could be extended/used to handle
1151 // the need for per-node per-group communicators.
1152 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1153 &mpi_comm_pp_physicalnode);
1154 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1155 &dd->comm->mpi_comm_gpu_shared);
1156 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1157 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1161 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1164 /* Note that some ranks could share a GPU, while others don't */
1166 if (dd->comm->nrank_gpu_shared == 1)
1168 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1171 GMX_UNUSED_VALUE(cr);
1172 GMX_UNUSED_VALUE(gpu_id);
1176 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1179 int dim0, dim1, i, j;
1184 fprintf(debug, "Making load communicators\n");
1187 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1188 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1196 make_load_communicator(dd, 0, loc);
1200 for (i = 0; i < dd->nc[dim0]; i++)
1203 make_load_communicator(dd, 1, loc);
1209 for (i = 0; i < dd->nc[dim0]; i++)
1213 for (j = 0; j < dd->nc[dim1]; j++)
1216 make_load_communicator(dd, 2, loc);
1223 fprintf(debug, "Finished making load communicators\n");
1228 /*! \brief Sets up the relation between neighboring domains and zones */
1229 static void setup_neighbor_relations(gmx_domdec_t *dd)
1231 int d, dim, i, j, m;
1233 gmx_domdec_zones_t *zones;
1234 gmx_domdec_ns_ranges_t *izone;
1235 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1237 for (d = 0; d < dd->ndim; d++)
1240 copy_ivec(dd->ci, tmp);
1241 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1242 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1243 copy_ivec(dd->ci, tmp);
1244 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1245 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1248 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1251 dd->neighbor[d][1]);
1255 int nzone = (1 << dd->ndim);
1256 int nizone = (1 << std::max(dd->ndim - 1, 0));
1257 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1259 zones = &dd->comm->zones;
1261 for (i = 0; i < nzone; i++)
1264 clear_ivec(zones->shift[i]);
1265 for (d = 0; d < dd->ndim; d++)
1267 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1272 for (i = 0; i < nzone; i++)
1274 for (d = 0; d < DIM; d++)
1276 s[d] = dd->ci[d] - zones->shift[i][d];
1281 else if (s[d] >= dd->nc[d])
1287 zones->nizone = nizone;
1288 for (i = 0; i < zones->nizone; i++)
1290 assert(ddNonbondedZonePairRanges[i][0] == i);
1292 izone = &zones->izone[i];
1293 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1294 * j-zones up to nzone.
1296 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1297 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1298 for (dim = 0; dim < DIM; dim++)
1300 if (dd->nc[dim] == 1)
1302 /* All shifts should be allowed */
1303 izone->shift0[dim] = -1;
1304 izone->shift1[dim] = 1;
1308 /* Determine the min/max j-zone shift wrt the i-zone */
1309 izone->shift0[dim] = 1;
1310 izone->shift1[dim] = -1;
1311 for (j = izone->j0; j < izone->j1; j++)
1313 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1314 if (shift_diff < izone->shift0[dim])
1316 izone->shift0[dim] = shift_diff;
1318 if (shift_diff > izone->shift1[dim])
1320 izone->shift1[dim] = shift_diff;
1327 if (!isDlbDisabled(dd->comm))
1329 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1332 if (dd->comm->ddSettings.recordLoad)
1334 make_load_communicators(dd);
1338 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1340 t_commrec gmx_unused *cr,
1341 bool gmx_unused reorder)
1344 gmx_domdec_comm_t *comm = dd->comm;
1345 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1347 if (cartSetup.bCartesianPP)
1349 /* Set up cartesian communication for the particle-particle part */
1350 GMX_LOG(mdlog.info).appendTextFormatted(
1351 "Will use a Cartesian communicator: %d x %d x %d",
1352 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1355 for (int i = 0; i < DIM; i++)
1360 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1362 /* We overwrite the old communicator with the new cartesian one */
1363 cr->mpi_comm_mygroup = comm_cart;
1366 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1367 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1369 if (cartSetup.bCartesianPP_PME)
1371 /* Since we want to use the original cartesian setup for sim,
1372 * and not the one after split, we need to make an index.
1374 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1375 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1376 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1377 /* Get the rank of the DD master,
1378 * above we made sure that the master node is a PP node.
1389 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1391 else if (cartSetup.bCartesianPP)
1393 if (!comm->ddRankSetup.usePmeOnlyRanks)
1395 /* The PP communicator is also
1396 * the communicator for this simulation
1398 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1400 cr->nodeid = dd->rank;
1402 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1404 /* We need to make an index to go from the coordinates
1405 * to the nodeid of this simulation.
1407 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1408 std::vector<int> buf(dd->nnodes);
1409 if (thisRankHasDuty(cr, DUTY_PP))
1411 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1413 /* Communicate the ddindex to simulation nodeid index */
1414 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1415 cr->mpi_comm_mysim);
1417 /* Determine the master coordinates and rank.
1418 * The DD master should be the same node as the master of this sim.
1420 for (int i = 0; i < dd->nnodes; i++)
1422 if (cartSetup.ddindex2simnodeid[i] == 0)
1424 ddindex2xyz(dd->nc, i, dd->master_ci);
1425 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1430 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1435 /* No Cartesian communicators */
1436 /* We use the rank in dd->comm->all as DD index */
1437 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1438 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1440 clear_ivec(dd->master_ci);
1444 GMX_LOG(mdlog.info).appendTextFormatted(
1445 "Domain decomposition rank %d, coordinates %d %d %d\n",
1446 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1450 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1451 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1455 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1459 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1461 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1463 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1464 std::vector<int> buf(dd->nnodes);
1465 if (thisRankHasDuty(cr, DUTY_PP))
1467 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1469 /* Communicate the ddindex to simulation nodeid index */
1470 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1471 cr->mpi_comm_mysim);
1474 GMX_UNUSED_VALUE(dd);
1475 GMX_UNUSED_VALUE(cr);
1479 static CartesianRankSetup
1480 split_communicator(const gmx::MDLogger &mdlog,
1482 const DdRankOrder ddRankOrder,
1483 bool gmx_unused reorder,
1484 const DDRankSetup &ddRankSetup,
1486 std::vector<int> *pmeRanks)
1488 CartesianRankSetup cartSetup;
1490 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1491 cartSetup.bCartesianPP_PME = false;
1493 const ivec &numDDCells = ddRankSetup.numPPCells;
1494 /* Initially we set ntot to the number of PP cells */
1495 copy_ivec(numDDCells, cartSetup.ntot);
1497 if (cartSetup.bCartesianPP)
1499 const int numDDCellsTot = ddRankSetup.numPPRanks;
1501 for (int i = 1; i < DIM; i++)
1503 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1505 if (bDiv[YY] || bDiv[ZZ])
1507 cartSetup.bCartesianPP_PME = TRUE;
1508 /* If we have 2D PME decomposition, which is always in x+y,
1509 * we stack the PME only nodes in z.
1510 * Otherwise we choose the direction that provides the thinnest slab
1511 * of PME only nodes as this will have the least effect
1512 * on the PP communication.
1513 * But for the PME communication the opposite might be better.
1515 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1517 numDDCells[YY] > numDDCells[ZZ]))
1519 cartSetup.cartpmedim = ZZ;
1523 cartSetup.cartpmedim = YY;
1525 cartSetup.ntot[cartSetup.cartpmedim]
1526 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1530 GMX_LOG(mdlog.info).appendTextFormatted(
1531 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1532 ddRankSetup.numRanksDoingPme,
1533 numDDCells[XX], numDDCells[YY],
1534 numDDCells[XX], numDDCells[ZZ]);
1535 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1539 if (cartSetup.bCartesianPP_PME)
1545 GMX_LOG(mdlog.info).appendTextFormatted(
1546 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1547 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1549 for (int i = 0; i < DIM; i++)
1554 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1556 MPI_Comm_rank(comm_cart, &rank);
1557 if (MASTER(cr) && rank != 0)
1559 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1562 /* With this assigment we loose the link to the original communicator
1563 * which will usually be MPI_COMM_WORLD, unless have multisim.
1565 cr->mpi_comm_mysim = comm_cart;
1566 cr->sim_nodeid = rank;
1568 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1570 GMX_LOG(mdlog.info).appendTextFormatted(
1571 "Cartesian rank %d, coordinates %d %d %d\n",
1572 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1574 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1578 if (!ddRankSetup.usePmeOnlyRanks ||
1579 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1581 cr->duty = DUTY_PME;
1584 /* Split the sim communicator into PP and PME only nodes */
1585 MPI_Comm_split(cr->mpi_comm_mysim,
1586 getThisRankDuties(cr),
1587 dd_index(cartSetup.ntot, ddCellIndex),
1588 &cr->mpi_comm_mygroup);
1590 GMX_UNUSED_VALUE(ddCellIndex);
1595 switch (ddRankOrder)
1597 case DdRankOrder::pp_pme:
1598 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1600 case DdRankOrder::interleave:
1601 /* Interleave the PP-only and PME-only ranks */
1602 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1603 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1605 case DdRankOrder::cartesian:
1608 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1611 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1613 cr->duty = DUTY_PME;
1620 /* Split the sim communicator into PP and PME only nodes */
1621 MPI_Comm_split(cr->mpi_comm_mysim,
1622 getThisRankDuties(cr),
1624 &cr->mpi_comm_mygroup);
1625 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1629 GMX_LOG(mdlog.info).appendTextFormatted(
1630 "This rank does only %s work.\n",
1631 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1636 /*! \brief Makes the PP communicator and the PME communicator, when needed
1638 * Returns the Cartesian rank setup.
1639 * Sets \p cr->mpi_comm_mygroup
1640 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1641 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1643 static CartesianRankSetup
1644 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1645 const DDSettings &ddSettings,
1646 const DdRankOrder ddRankOrder,
1647 const DDRankSetup &ddRankSetup,
1650 std::vector<int> *pmeRanks)
1652 CartesianRankSetup cartSetup;
1654 if (ddRankSetup.usePmeOnlyRanks)
1656 /* Split the communicator into a PP and PME part */
1658 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1659 ddRankSetup, ddCellIndex, pmeRanks);
1663 /* All nodes do PP and PME */
1664 /* We do not require separate communicators */
1665 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1667 cartSetup.bCartesianPP = false;
1668 cartSetup.bCartesianPP_PME = false;
1674 /*! \brief For PP ranks, sets or makes the communicator
1676 * For PME ranks get the rank id.
1677 * For PP only ranks, sets the PME-only rank.
1679 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1680 const DDSettings &ddSettings,
1681 gmx::ArrayRef<const int> pmeRanks,
1683 const int numAtomsInSystem,
1686 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1687 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1689 if (thisRankHasDuty(cr, DUTY_PP))
1691 /* Copy or make a new PP communicator */
1693 /* We (possibly) reordered the nodes in split_communicator,
1694 * so it is no longer required in make_pp_communicator.
1696 const bool useCartesianReorder =
1697 (ddSettings.useCartesianReorder &&
1698 !cartSetup.bCartesianPP_PME);
1700 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1704 receive_ddindex2simnodeid(dd, cr);
1707 if (!thisRankHasDuty(cr, DUTY_PME))
1709 /* Set up the commnuication to our PME node */
1710 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1711 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1714 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1715 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1720 dd->pme_nodeid = -1;
1723 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1726 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1732 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1733 const char *dir, int nc, const char *size_string)
1735 real *slb_frac, tot;
1740 if (nc > 1 && size_string != nullptr)
1742 GMX_LOG(mdlog.info).appendTextFormatted(
1743 "Using static load balancing for the %s direction", dir);
1746 for (i = 0; i < nc; i++)
1749 sscanf(size_string, "%20lf%n", &dbl, &n);
1752 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1759 std::string relativeCellSizes = "Relative cell sizes:";
1760 for (i = 0; i < nc; i++)
1763 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1765 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1771 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1774 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1776 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1778 for (auto &ilist : extractILists(*ilists, IF_BOND))
1780 if (NRAL(ilist.functionType) > 2)
1782 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1790 static int dd_getenv(const gmx::MDLogger &mdlog,
1791 const char *env_var, int def)
1797 val = getenv(env_var);
1800 if (sscanf(val, "%20d", &nst) <= 0)
1804 GMX_LOG(mdlog.info).appendTextFormatted(
1805 "Found env.var. %s = %s, using value %d",
1812 static void check_dd_restrictions(const gmx_domdec_t *dd,
1813 const t_inputrec *ir,
1814 const gmx::MDLogger &mdlog)
1816 if (ir->ePBC == epbcSCREW &&
1817 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1819 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1822 if (ir->nstlist == 0)
1824 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1827 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1829 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1833 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1834 const ivec numDomains)
1836 real r = ddbox.box_size[XX];
1837 for (int d = 0; d < DIM; d++)
1839 if (numDomains[d] > 1)
1841 /* Check using the initial average cell size */
1842 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1849 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1851 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1852 const std::string &reasonStr,
1853 const gmx::MDLogger &mdlog)
1855 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1856 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1858 if (cmdlineDlbState == DlbState::onUser)
1860 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1862 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1864 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1866 return DlbState::offForever;
1869 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1871 * This function parses the parameters of "-dlb" command line option setting
1872 * corresponding state values. Then it checks the consistency of the determined
1873 * state with other run parameters and settings. As a result, the initial state
1874 * may be altered or an error may be thrown if incompatibility of options is detected.
1876 * \param [in] mdlog Logger.
1877 * \param [in] dlbOption Enum value for the DLB option.
1878 * \param [in] bRecordLoad True if the load balancer is recording load information.
1879 * \param [in] mdrunOptions Options for mdrun.
1880 * \param [in] ir Pointer mdrun to input parameters.
1881 * \returns DLB initial/startup state.
1883 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1884 DlbOption dlbOption, gmx_bool bRecordLoad,
1885 const gmx::MdrunOptions &mdrunOptions,
1886 const t_inputrec *ir)
1888 DlbState dlbState = DlbState::offCanTurnOn;
1892 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1893 case DlbOption::no: dlbState = DlbState::offUser; break;
1894 case DlbOption::yes: dlbState = DlbState::onUser; break;
1895 default: gmx_incons("Invalid dlbOption enum value");
1898 /* Reruns don't support DLB: bail or override auto mode */
1899 if (mdrunOptions.rerun)
1901 std::string reasonStr = "it is not supported in reruns.";
1902 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1905 /* Unsupported integrators */
1906 if (!EI_DYNAMICS(ir->eI))
1908 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1909 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1912 /* Without cycle counters we can't time work to balance on */
1915 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1916 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1919 if (mdrunOptions.reproducible)
1921 std::string reasonStr = "you started a reproducible run.";
1924 case DlbState::offUser:
1926 case DlbState::offForever:
1927 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1929 case DlbState::offCanTurnOn:
1930 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1931 case DlbState::onCanTurnOff:
1932 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1934 case DlbState::onUser:
1935 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1937 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1944 static gmx_domdec_comm_t *init_dd_comm()
1946 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1948 comm->n_load_have = 0;
1949 comm->n_load_collect = 0;
1951 comm->haveTurnedOffDlb = false;
1953 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1955 comm->sum_nat[i] = 0;
1959 comm->load_step = 0;
1962 clear_ivec(comm->load_lim);
1966 /* This should be replaced by a unique pointer */
1967 comm->balanceRegion = ddBalanceRegionAllocate();
1972 /* Returns whether mtop contains constraints and/or vsites */
1973 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1975 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1977 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1979 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1988 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1989 const gmx_mtop_t &mtop,
1990 const t_inputrec &inputrec,
1991 const real cutoffMargin,
1992 DDSystemInfo *systemInfo)
1994 /* When we have constraints and/or vsites, it is beneficial to use
1995 * update groups (when possible) to allow independent update of groups.
1997 if (!systemHasConstraintsOrVsites(mtop))
1999 /* No constraints or vsites, atoms can be updated independently */
2003 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2004 systemInfo->useUpdateGroups =
2005 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2006 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2008 if (systemInfo->useUpdateGroups)
2010 int numUpdateGroups = 0;
2011 for (const auto &molblock : mtop.molblock)
2013 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2016 systemInfo->maxUpdateGroupRadius =
2017 computeMaxUpdateGroupRadius(mtop,
2018 systemInfo->updateGroupingPerMoleculetype,
2019 maxReferenceTemperature(inputrec));
2021 /* To use update groups, the large domain-to-domain cutoff distance
2022 * should be compatible with the box size.
2024 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2026 if (systemInfo->useUpdateGroups)
2028 GMX_LOG(mdlog.info).appendTextFormatted(
2029 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2031 mtop.natoms/static_cast<double>(numUpdateGroups),
2032 systemInfo->maxUpdateGroupRadius);
2036 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2037 systemInfo->updateGroupingPerMoleculetype.clear();
2042 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2043 npbcdim(ePBC2npbcdim(ir.ePBC)),
2044 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2045 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2046 haveScrewPBC(ir.ePBC == epbcSCREW)
2050 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2052 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2053 const bool useUpdateGroups,
2054 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2056 if (useUpdateGroups)
2058 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2059 "Need one grouping per moltype");
2060 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2062 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2070 for (const auto &moltype : mtop.moltype)
2072 if (moltype.atoms.nr > 1)
2082 /*! \brief Generate the simulation system information */
2084 getSystemInfo(const gmx::MDLogger &mdlog,
2085 const t_commrec *cr,
2086 const DomdecOptions &options,
2087 const gmx_mtop_t &mtop,
2088 const t_inputrec &ir,
2090 gmx::ArrayRef<const gmx::RVec> xGlobal)
2092 const real tenPercentMargin = 1.1;
2094 DDSystemInfo systemInfo;
2096 /* We need to decide on update groups early, as this affects communication distances */
2097 systemInfo.useUpdateGroups = false;
2098 if (ir.cutoff_scheme == ecutsVERLET)
2100 real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
2101 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2104 systemInfo.moleculesAreAlwaysWhole =
2105 moleculesAreAlwaysWhole(mtop,
2106 systemInfo.useUpdateGroups,
2107 systemInfo.updateGroupingPerMoleculetype);
2108 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2109 mtop.bIntermolecularInteractions);
2110 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2111 multi_body_bondeds_count(&mtop) > 0);
2113 if (systemInfo.useUpdateGroups)
2115 systemInfo.haveSplitConstraints = false;
2116 systemInfo.haveSplitSettles = false;
2120 systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2121 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2122 systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2127 /* Set the cut-off to some very large value,
2128 * so we don't need if statements everywhere in the code.
2129 * We use sqrt, since the cut-off is squared in some places.
2131 systemInfo.cutoff = GMX_CUTOFF_INF;
2135 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2137 systemInfo.minCutoffForMultiBody = 0;
2139 /* Determine the minimum cell size limit, affected by many factors */
2140 systemInfo.cellsizeLimit = 0;
2141 systemInfo.filterBondedCommunication = false;
2143 /* We do not allow home atoms to move beyond the neighboring domain
2144 * between domain decomposition steps, which limits the cell size.
2145 * Get an estimate of cell size limit due to atom displacement.
2146 * In most cases this is a large overestimate, because it assumes
2147 * non-interaction atoms.
2148 * We set the chance to 1 in a trillion steps.
2150 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2151 const real limitForAtomDisplacement =
2152 minCellSizeForAtomDisplacement(mtop, ir,
2153 systemInfo.updateGroupingPerMoleculetype,
2154 c_chanceThatAtomMovesBeyondDomain);
2155 GMX_LOG(mdlog.info).appendTextFormatted(
2156 "Minimum cell size due to atom displacement: %.3f nm",
2157 limitForAtomDisplacement);
2159 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2160 limitForAtomDisplacement);
2162 /* TODO: PME decomposition currently requires atoms not to be more than
2163 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2164 * In nearly all cases, limitForAtomDisplacement will be smaller
2165 * than 2/3*rlist, so the PME requirement is satisfied.
2166 * But it would be better for both correctness and performance
2167 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2168 * Note that we would need to improve the pairlist buffer case.
2171 if (systemInfo.haveInterDomainBondeds)
2173 if (options.minimumCommunicationRange > 0)
2175 systemInfo.minCutoffForMultiBody =
2176 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2177 if (options.useBondedCommunication)
2179 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2183 systemInfo.cutoff = std::max(systemInfo.cutoff,
2184 systemInfo.minCutoffForMultiBody);
2187 else if (ir.bPeriodicMols)
2189 /* Can not easily determine the required cut-off */
2190 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2191 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2199 dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
2200 options.checkBondedInteractions,
2203 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2204 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2206 /* We use an initial margin of 10% for the minimum cell size,
2207 * except when we are just below the non-bonded cut-off.
2209 if (options.useBondedCommunication)
2211 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2213 const real r_bonded = std::max(r_2b, r_mb);
2214 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2215 /* This is the (only) place where we turn on the filtering */
2216 systemInfo.filterBondedCommunication = true;
2220 const real r_bonded = r_mb;
2221 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2224 /* We determine cutoff_mbody later */
2225 systemInfo.increaseMultiBodyCutoff = true;
2229 /* No special bonded communication,
2230 * simply increase the DD cut-off.
2232 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2233 systemInfo.cutoff = std::max(systemInfo.cutoff,
2234 systemInfo.minCutoffForMultiBody);
2237 GMX_LOG(mdlog.info).appendTextFormatted(
2238 "Minimum cell size due to bonded interactions: %.3f nm",
2239 systemInfo.minCutoffForMultiBody);
2241 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2242 systemInfo.minCutoffForMultiBody);
2245 systemInfo.constraintCommunicationRange = 0;
2246 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2248 /* There is a cell size limit due to the constraints (P-LINCS) */
2249 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2250 GMX_LOG(mdlog.info).appendTextFormatted(
2251 "Estimated maximum distance required for P-LINCS: %.3f nm",
2252 systemInfo.constraintCommunicationRange);
2253 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2255 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2258 else if (options.constraintCommunicationRange > 0)
2260 /* Here we do not check for dd->splitConstraints.
2261 * because one can also set a cell size limit for virtual sites only
2262 * and at this point we don't know yet if there are intercg v-sites.
2264 GMX_LOG(mdlog.info).appendTextFormatted(
2265 "User supplied maximum distance required for P-LINCS: %.3f nm",
2266 options.constraintCommunicationRange);
2267 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2269 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2270 systemInfo.constraintCommunicationRange);
2275 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2278 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2279 const t_commrec *cr,
2280 const DomdecOptions &options,
2281 const DDSettings &ddSettings,
2282 const DDSystemInfo &systemInfo,
2283 const real cellsizeLimit,
2284 const gmx_ddbox_t &ddbox)
2286 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2289 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2290 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2291 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2292 !bC ? "-rdd" : "-rcon",
2293 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2294 bC ? " or your LINCS settings" : "");
2296 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2297 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2299 "Look in the log file for details on the domain decomposition",
2300 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2305 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2306 if (acs < cellsizeLimit)
2308 if (options.numCells[XX] <= 0)
2310 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2314 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2315 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2316 acs, cellsizeLimit);
2320 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2321 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2323 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2324 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2325 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2327 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2329 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2330 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2334 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2336 getDDRankSetup(const gmx::MDLogger &mdlog,
2338 const DDGridSetup &ddGridSetup,
2339 const t_inputrec &ir)
2341 GMX_LOG(mdlog.info).appendTextFormatted(
2342 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2343 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2344 ddGridSetup.numPmeOnlyRanks);
2346 DDRankSetup ddRankSetup;
2348 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2349 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2351 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2352 if (ddRankSetup.usePmeOnlyRanks)
2354 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2358 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2361 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2363 /* The following choices should match those
2364 * in comm_cost_est in domdec_setup.c.
2365 * Note that here the checks have to take into account
2366 * that the decomposition might occur in a different order than xyz
2367 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2368 * in which case they will not match those in comm_cost_est,
2369 * but since that is mainly for testing purposes that's fine.
2371 if (ddGridSetup.numDDDimensions >= 2 &&
2372 ddGridSetup.ddDimensions[0] == XX &&
2373 ddGridSetup.ddDimensions[1] == YY &&
2374 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2375 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2376 getenv("GMX_PMEONEDD") == nullptr)
2378 ddRankSetup.npmedecompdim = 2;
2379 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2380 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2384 /* In case nc is 1 in both x and y we could still choose to
2385 * decompose pme in y instead of x, but we use x for simplicity.
2387 ddRankSetup.npmedecompdim = 1;
2388 if (ddGridSetup.ddDimensions[0] == YY)
2390 ddRankSetup.npmenodes_x = 1;
2391 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2395 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2396 ddRankSetup.npmenodes_y = 1;
2399 GMX_LOG(mdlog.info).appendTextFormatted(
2400 "PME domain decomposition: %d x %d x %d",
2401 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2405 ddRankSetup.npmedecompdim = 0;
2406 ddRankSetup.npmenodes_x = 0;
2407 ddRankSetup.npmenodes_y = 0;
2413 /*! \brief Set the cell size and interaction limits */
2414 static void set_dd_limits(const gmx::MDLogger &mdlog,
2415 t_commrec *cr, gmx_domdec_t *dd,
2416 const DomdecOptions &options,
2417 const DDSettings &ddSettings,
2418 const DDSystemInfo &systemInfo,
2419 const DDGridSetup &ddGridSetup,
2420 const int numPPRanks,
2421 const gmx_mtop_t *mtop,
2422 const t_inputrec *ir,
2423 const gmx_ddbox_t &ddbox)
2425 gmx_domdec_comm_t *comm = dd->comm;
2426 comm->ddSettings = ddSettings;
2428 /* Initialize to GPU share count to 0, might change later */
2429 comm->nrank_gpu_shared = 0;
2431 comm->dlbState = comm->ddSettings.initialDlbState;
2432 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2433 /* To consider turning DLB on after 2*nstlist steps we need to check
2434 * at partitioning count 3. Thus we need to increase the first count by 2.
2436 comm->ddPartioningCountFirstDlbOff += 2;
2438 comm->bPMELoadBalDLBLimits = FALSE;
2440 /* Allocate the charge group/atom sorting struct */
2441 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2443 comm->systemInfo = systemInfo;
2445 if (systemInfo.useUpdateGroups)
2447 /* Note: We would like to use dd->nnodes for the atom count estimate,
2448 * but that is not yet available here. But this anyhow only
2449 * affect performance up to the second dd_partition_system call.
2451 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2452 comm->updateGroupsCog =
2453 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2454 systemInfo.updateGroupingPerMoleculetype,
2455 maxReferenceTemperature(*ir),
2456 homeAtomCountEstimate);
2459 /* Set the DD setup given by ddGridSetup */
2460 copy_ivec(ddGridSetup.numDomains, dd->nc);
2461 dd->ndim = ddGridSetup.numDDDimensions;
2462 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2464 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2466 snew(comm->slb_frac, DIM);
2467 if (isDlbDisabled(comm))
2469 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2470 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2471 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2474 /* Set the multi-body cut-off and cellsize limit for DLB */
2475 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2476 comm->cellsize_limit = systemInfo.cellsizeLimit;
2477 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2479 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2481 /* Set the bonded communication distance to halfway
2482 * the minimum and the maximum,
2483 * since the extra communication cost is nearly zero.
2485 real acs = average_cellsize_min(ddbox, dd->nc);
2486 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2487 if (!isDlbDisabled(comm))
2489 /* Check if this does not limit the scaling */
2490 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2491 options.dlbScaling*acs);
2493 if (!systemInfo.filterBondedCommunication)
2495 /* Without bBondComm do not go beyond the n.b. cut-off */
2496 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2497 if (comm->cellsize_limit >= systemInfo.cutoff)
2499 /* We don't loose a lot of efficieny
2500 * when increasing it to the n.b. cut-off.
2501 * It can even be slightly faster, because we need
2502 * less checks for the communication setup.
2504 comm->cutoff_mbody = systemInfo.cutoff;
2507 /* Check if we did not end up below our original limit */
2508 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2509 systemInfo.minCutoffForMultiBody);
2511 if (comm->cutoff_mbody > comm->cellsize_limit)
2513 comm->cellsize_limit = comm->cutoff_mbody;
2516 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2521 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2522 "cellsize limit %f\n",
2523 gmx::boolToString(systemInfo.filterBondedCommunication),
2524 comm->cellsize_limit);
2529 check_dd_restrictions(dd, ir, mdlog);
2533 void dd_init_bondeds(FILE *fplog,
2535 const gmx_mtop_t *mtop,
2536 const gmx_vsite_t *vsite,
2537 const t_inputrec *ir,
2539 cginfo_mb_t *cginfo_mb)
2541 gmx_domdec_comm_t *comm;
2543 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2547 if (comm->systemInfo.filterBondedCommunication)
2549 /* Communicate atoms beyond the cut-off for bonded interactions */
2550 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2554 /* Only communicate atoms based on cut-off */
2555 comm->bondedLinks = nullptr;
2559 static void writeSettings(gmx::TextWriter *log,
2561 const gmx_mtop_t *mtop,
2562 const t_inputrec *ir,
2563 gmx_bool bDynLoadBal,
2565 const gmx_ddbox_t *ddbox)
2567 gmx_domdec_comm_t *comm;
2576 log->writeString("The maximum number of communication pulses is:");
2577 for (d = 0; d < dd->ndim; d++)
2579 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2581 log->ensureLineBreak();
2582 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2583 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2584 log->writeString("The allowed shrink of domain decomposition cells is:");
2585 for (d = 0; d < DIM; d++)
2589 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2596 comm->cellsize_min_dlb[d]/
2597 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2599 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2602 log->ensureLineBreak();
2606 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2607 log->writeString("The initial number of communication pulses is:");
2608 for (d = 0; d < dd->ndim; d++)
2610 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2612 log->ensureLineBreak();
2613 log->writeString("The initial domain decomposition cell size is:");
2614 for (d = 0; d < DIM; d++)
2618 log->writeStringFormatted(" %c %.2f nm",
2619 dim2char(d), dd->comm->cellsize_min[d]);
2622 log->ensureLineBreak();
2626 const bool haveInterDomainVsites =
2627 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2629 if (comm->systemInfo.haveInterDomainBondeds ||
2630 haveInterDomainVsites ||
2631 comm->systemInfo.haveSplitConstraints ||
2632 comm->systemInfo.haveSplitSettles)
2634 std::string decompUnits;
2635 if (comm->systemInfo.useUpdateGroups)
2637 decompUnits = "atom groups";
2641 decompUnits = "atoms";
2644 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2645 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2649 limit = dd->comm->cellsize_limit;
2653 if (dd->unitCellInfo.ddBoxIsDynamic)
2655 log->writeLine("(the following are initial values, they could change due to box deformation)");
2657 limit = dd->comm->cellsize_min[XX];
2658 for (d = 1; d < DIM; d++)
2660 limit = std::min(limit, dd->comm->cellsize_min[d]);
2664 if (comm->systemInfo.haveInterDomainBondeds)
2666 log->writeLineFormatted("%40s %-7s %6.3f nm",
2667 "two-body bonded interactions", "(-rdd)",
2668 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2669 log->writeLineFormatted("%40s %-7s %6.3f nm",
2670 "multi-body bonded interactions", "(-rdd)",
2671 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2673 if (haveInterDomainVsites)
2675 log->writeLineFormatted("%40s %-7s %6.3f nm",
2676 "virtual site constructions", "(-rcon)", limit);
2678 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2680 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2682 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2683 separation.c_str(), "(-rcon)", limit);
2685 log->ensureLineBreak();
2689 static void logSettings(const gmx::MDLogger &mdlog,
2691 const gmx_mtop_t *mtop,
2692 const t_inputrec *ir,
2694 const gmx_ddbox_t *ddbox)
2696 gmx::StringOutputStream stream;
2697 gmx::TextWriter log(&stream);
2698 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2699 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2702 log.ensureEmptyLine();
2703 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2705 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2707 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2710 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2713 const t_inputrec *ir,
2714 const gmx_ddbox_t *ddbox)
2716 gmx_domdec_comm_t *comm;
2717 int d, dim, npulse, npulse_d_max, npulse_d;
2722 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2724 /* Determine the maximum number of comm. pulses in one dimension */
2726 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2728 /* Determine the maximum required number of grid pulses */
2729 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2731 /* Only a single pulse is required */
2734 else if (!bNoCutOff && comm->cellsize_limit > 0)
2736 /* We round down slightly here to avoid overhead due to the latency
2737 * of extra communication calls when the cut-off
2738 * would be only slightly longer than the cell size.
2739 * Later cellsize_limit is redetermined,
2740 * so we can not miss interactions due to this rounding.
2742 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2746 /* There is no cell size limit */
2747 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2750 if (!bNoCutOff && npulse > 1)
2752 /* See if we can do with less pulses, based on dlb_scale */
2754 for (d = 0; d < dd->ndim; d++)
2757 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2758 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2759 npulse_d_max = std::max(npulse_d_max, npulse_d);
2761 npulse = std::min(npulse, npulse_d_max);
2764 /* This env var can override npulse */
2765 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2772 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2773 for (d = 0; d < dd->ndim; d++)
2775 if (comm->ddSettings.request1DAnd1Pulse)
2777 comm->cd[d].np_dlb = 1;
2781 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2782 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2784 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2786 comm->bVacDLBNoLimit = FALSE;
2790 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2791 if (!comm->bVacDLBNoLimit)
2793 comm->cellsize_limit = std::max(comm->cellsize_limit,
2794 comm->systemInfo.cutoff/comm->maxpulse);
2796 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2797 /* Set the minimum cell size for each DD dimension */
2798 for (d = 0; d < dd->ndim; d++)
2800 if (comm->bVacDLBNoLimit ||
2801 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2803 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2807 comm->cellsize_min_dlb[dd->dim[d]] =
2808 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2811 if (comm->cutoff_mbody <= 0)
2813 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2821 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2823 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2826 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2828 /* If each molecule is a single charge group
2829 * or we use domain decomposition for each periodic dimension,
2830 * we do not need to take pbc into account for the bonded interactions.
2832 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2835 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2838 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2839 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2840 gmx_domdec_t *dd, real dlb_scale,
2841 const gmx_mtop_t *mtop, const t_inputrec *ir,
2842 const gmx_ddbox_t *ddbox)
2844 gmx_domdec_comm_t *comm = dd->comm;
2845 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2847 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2849 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2850 if (ddRankSetup.npmedecompdim >= 2)
2852 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2857 ddRankSetup.numRanksDoingPme = 0;
2858 if (dd->pme_nodeid >= 0)
2860 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2861 "Can not have separate PME ranks without PME electrostatics");
2867 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2869 if (!isDlbDisabled(comm))
2871 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2874 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2877 if (ir->ePBC == epbcNONE)
2879 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2884 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2888 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2890 int natoms_tot = mtop->natoms;
2892 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2893 static_cast<int>(vol_frac*natoms_tot));
2896 /*! \brief Get some important DD parameters which can be modified by env.vars */
2898 getDDSettings(const gmx::MDLogger &mdlog,
2899 const DomdecOptions &options,
2900 const gmx::MdrunOptions &mdrunOptions,
2901 const t_inputrec &ir)
2903 DDSettings ddSettings;
2905 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2906 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2907 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2908 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2909 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2910 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2911 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2912 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2913 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2914 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2916 if (ddSettings.useSendRecv2)
2918 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2921 if (ddSettings.eFlop)
2923 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2924 ddSettings.recordLoad = true;
2928 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2931 ddSettings.initialDlbState =
2932 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2933 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2934 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2939 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2944 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2946 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2947 * generally requires a larger box than other possible decompositions
2948 * with the same rank count, so the calling code might need to decide
2949 * what is the most appropriate way to run the simulation based on
2950 * whether such a DD is possible.
2952 * This function works like init_domain_decomposition(), but will not
2953 * give a fatal error, and only uses \c cr for communicating between
2956 * It is safe to call before thread-MPI spawns ranks, so that
2957 * thread-MPI can decide whether and how to trigger the GPU halo
2958 * exchange code path. The number of PME ranks, if any, should be set
2959 * in \c options.numPmeRanks.
2962 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
2963 const t_commrec *cr,
2964 const int numRanksRequested,
2965 const DomdecOptions &options,
2966 const gmx_mtop_t &mtop,
2967 const t_inputrec &ir,
2969 gmx::ArrayRef<const gmx::RVec> xGlobal)
2971 // Ensure we don't write any output from this checking routine
2972 gmx::MDLogger dummyLogger;
2974 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
2976 DDSettings ddSettings = ddSettingsOriginal;
2977 ddSettings.request1DAnd1Pulse = true;
2978 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2979 !isDlbDisabled(ddSettings.initialDlbState),
2980 options.dlbScaling, ir,
2981 systemInfo.cellsizeLimit);
2982 gmx_ddbox_t ddbox = {0};
2983 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2984 ddSettings, systemInfo, gridSetupCellsizeLimit,
2985 mtop, ir, box, xGlobal, &ddbox);
2987 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2989 return canMakeDDWith1DAnd1Pulse;
2992 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2994 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2995 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2996 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2998 const bool hasMax1Pulse =
2999 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
3000 (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3002 return decompositionHasOneDimension && hasMax1Pulse;
3009 // TODO once the functionality stablizes, move this class and
3010 // supporting functionality into builder.cpp
3011 /*! \brief Impl class for DD builder */
3012 class DomainDecompositionBuilder::Impl
3016 Impl(const MDLogger &mdlog,
3018 const DomdecOptions &options,
3019 const MdrunOptions &mdrunOptions,
3020 bool prefer1DAnd1Pulse,
3021 const gmx_mtop_t &mtop,
3022 const t_inputrec &ir,
3024 ArrayRef<const RVec> xGlobal);
3026 //! Build the resulting DD manager
3027 gmx_domdec_t *build(LocalAtomSetManager *atomSets);
3029 //! Objects used in constructing and configuring DD
3032 const MDLogger &mdlog_;
3033 //! Communication object
3035 //! User-supplied options configuring DD behavior
3036 const DomdecOptions options_;
3037 //! Global system topology
3038 const gmx_mtop_t &mtop_;
3039 //! User input values from the tpr file
3040 const t_inputrec &ir_;
3043 //! Internal objects used in constructing DD
3045 //! Settings combined from the user input
3046 DDSettings ddSettings_;
3047 //! Information derived from the simulation system
3048 DDSystemInfo systemInfo_;
3050 gmx_ddbox_t ddbox_ = { 0 };
3051 //! Organization of the DD grids
3052 DDGridSetup ddGridSetup_;
3053 //! Organzation of the DD ranks
3054 DDRankSetup ddRankSetup_;
3055 //! Number of DD cells in each dimension
3056 ivec ddCellIndex_ = { 0, 0, 0 };
3057 //! IDs of PME-only ranks
3058 std::vector<int> pmeRanks_;
3059 //! Contains a valid Cartesian-communicator-based setup, or defaults.
3060 CartesianRankSetup cartSetup_;
3065 DomainDecompositionBuilder::Impl::Impl(const MDLogger &mdlog,
3067 const DomdecOptions &options,
3068 const MdrunOptions &mdrunOptions,
3069 const bool prefer1DAnd1Pulse,
3070 const gmx_mtop_t &mtop,
3071 const t_inputrec &ir,
3073 ArrayRef<const RVec> xGlobal)
3080 GMX_LOG(mdlog_.info).appendTextFormatted(
3081 "\nInitializing Domain Decomposition on %d ranks", cr_->nnodes);
3083 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
3085 if (prefer1DAnd1Pulse &&
3086 canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_,
3087 mtop_, ir_, box, xGlobal))
3089 ddSettings_.request1DAnd1Pulse = true;
3092 if (ddSettings_.eFlop > 1)
3094 /* Ensure that we have different random flop counts on different ranks */
3095 srand(1 + cr_->nodeid);
3098 systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
3100 const int numRanksRequested = cr_->nnodes;
3101 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
3103 // DD grid setup uses a more different cell size limit for
3104 // automated setup than the one in systemInfo_. The latter is used
3105 // in set_dd_limits() to configure DLB, for example.
3106 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog_, ddSettings_.request1DAnd1Pulse,
3107 !isDlbDisabled(ddSettings_.initialDlbState),
3108 options_.dlbScaling, ir_,
3109 systemInfo_.cellsizeLimit);
3110 ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_,
3111 ddSettings_, systemInfo_, gridSetupCellsizeLimit,
3112 mtop_, ir_, box, xGlobal, &ddbox_);
3113 checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3115 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3117 ddRankSetup_ = getDDRankSetup(mdlog_, cr_, ddGridSetup_, ir_);
3119 /* Generate the group communicator, also decides the duty of each rank */
3121 makeGroupCommunicators(mdlog_, ddSettings_, options_.rankOrder,
3123 ddCellIndex_, &pmeRanks_);
3126 gmx_domdec_t *DomainDecompositionBuilder::Impl::build(LocalAtomSetManager *atomSets)
3128 gmx_domdec_t *dd = new gmx_domdec_t(ir_);
3130 copy_ivec(ddCellIndex_, dd->ci);
3132 dd->comm = init_dd_comm();
3134 dd->comm->ddRankSetup = ddRankSetup_;
3135 dd->comm->cartesianRankSetup = cartSetup_;
3137 set_dd_limits(mdlog_, cr_, dd, options_,
3138 ddSettings_, systemInfo_,
3140 ddRankSetup_.numPPRanks,
3144 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3146 if (thisRankHasDuty(cr_, DUTY_PP))
3148 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3150 setup_neighbor_relations(dd);
3153 /* Set overallocation to avoid frequent reallocation of arrays */
3154 set_over_alloc_dd(TRUE);
3156 dd->atomSets = atomSets;
3161 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger &mdlog,
3163 const DomdecOptions &options,
3164 const MdrunOptions &mdrunOptions,
3165 const bool prefer1DAnd1Pulse,
3166 const gmx_mtop_t &mtop,
3167 const t_inputrec &ir,
3169 ArrayRef<const RVec> xGlobal)
3170 : impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
3174 gmx_domdec_t *DomainDecompositionBuilder::build(LocalAtomSetManager *atomSets)
3176 return impl_->build(atomSets);
3179 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3183 static gmx_bool test_dd_cutoff(t_commrec *cr,
3185 gmx::ArrayRef<const gmx::RVec> x,
3186 real cutoffRequested)
3196 set_ddbox(*dd, false, box, true, x, &ddbox);
3200 for (d = 0; d < dd->ndim; d++)
3204 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3205 if (dd->unitCellInfo.ddBoxIsDynamic)
3207 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3210 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3212 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3217 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3219 if (np > dd->comm->cd[d].np_dlb)
3224 /* If a current local cell size is smaller than the requested
3225 * cut-off, we could still fix it, but this gets very complicated.
3226 * Without fixing here, we might actually need more checks.
3228 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3229 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3236 if (!isDlbDisabled(dd->comm))
3238 /* If DLB is not active yet, we don't need to check the grid jumps.
3239 * Actually we shouldn't, because then the grid jump data is not set.
3241 if (isDlbOn(dd->comm) &&
3242 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3247 gmx_sumi(1, &LocallyLimited, cr);
3249 if (LocallyLimited > 0)
3258 gmx_bool change_dd_cutoff(t_commrec *cr,
3260 gmx::ArrayRef<const gmx::RVec> x,
3261 real cutoffRequested)
3263 gmx_bool bCutoffAllowed;
3265 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3269 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3272 return bCutoffAllowed;