2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/gpuhaloexchange.h"
59 #include "gromacs/domdec/options.h"
60 #include "gromacs/domdec/partition.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/listed_forces/manage_threading.h"
66 #include "gromacs/math/vec.h"
67 #include "gromacs/math/vectypes.h"
68 #include "gromacs/mdlib/calc_verletbuf.h"
69 #include "gromacs/mdlib/constr.h"
70 #include "gromacs/mdlib/constraintrange.h"
71 #include "gromacs/mdlib/updategroups.h"
72 #include "gromacs/mdlib/vsite.h"
73 #include "gromacs/mdtypes/commrec.h"
74 #include "gromacs/mdtypes/forceoutput.h"
75 #include "gromacs/mdtypes/inputrec.h"
76 #include "gromacs/mdtypes/mdrunoptions.h"
77 #include "gromacs/mdtypes/state.h"
78 #include "gromacs/pbcutil/ishift.h"
79 #include "gromacs/pbcutil/pbc.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/timing/wallcycle.h"
82 #include "gromacs/topology/block.h"
83 #include "gromacs/topology/idef.h"
84 #include "gromacs/topology/ifunc.h"
85 #include "gromacs/topology/mtop_lookup.h"
86 #include "gromacs/topology/mtop_util.h"
87 #include "gromacs/topology/topology.h"
88 #include "gromacs/utility/basedefinitions.h"
89 #include "gromacs/utility/basenetwork.h"
90 #include "gromacs/utility/cstringutil.h"
91 #include "gromacs/utility/exceptions.h"
92 #include "gromacs/utility/fatalerror.h"
93 #include "gromacs/utility/gmxmpi.h"
94 #include "gromacs/utility/logger.h"
95 #include "gromacs/utility/real.h"
96 #include "gromacs/utility/smalloc.h"
97 #include "gromacs/utility/strconvert.h"
98 #include "gromacs/utility/stringstream.h"
99 #include "gromacs/utility/stringutil.h"
100 #include "gromacs/utility/textwriter.h"
102 #include "atomdistribution.h"
104 #include "cellsizes.h"
105 #include "distribute.h"
106 #include "domdec_constraints.h"
107 #include "domdec_internal.h"
108 #include "domdec_setup.h"
109 #include "domdec_vsite.h"
110 #include "redistribute.h"
113 // TODO remove this when moving domdec into gmx namespace
114 using gmx::DdRankOrder;
115 using gmx::DlbOption;
116 using gmx::DomdecOptions;
118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
124 #define DD_FLAG_NRCG 65535
125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
128 /* The DD zone order */
129 static const ivec dd_zo[DD_MAXZONE] =
130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
132 /* The non-bonded zone-pair setup for domain decomposition
133 * The first number is the i-zone, the second number the first j-zone seen by
134 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
135 * As is, this is for 3D decomposition, where there are 4 i-zones.
136 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
137 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
140 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
147 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
149 static void index2xyz(ivec nc,int ind,ivec xyz)
151 xyz[XX] = ind % nc[XX];
152 xyz[YY] = (ind / nc[XX]) % nc[YY];
153 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
159 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
160 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
161 xyz[ZZ] = ind % nc[ZZ];
164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
168 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
169 const int ddindex = dd_index(dd->nc, c);
170 if (cartSetup.bCartesianPP_PME)
172 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
174 else if (cartSetup.bCartesianPP)
177 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
188 int ddglatnr(const gmx_domdec_t *dd, int i)
198 if (i >= dd->comm->atomRanges.numAtomsTotal())
200 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
202 atnr = dd->globalAtomIndices[i] + 1;
208 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
210 return &dd->comm->cgs_gl;
213 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
215 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
216 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
219 void dd_store_state(gmx_domdec_t *dd, t_state *state)
223 if (state->ddp_count != dd->ddp_count)
225 gmx_incons("The MD state does not match the domain decomposition state");
228 state->cg_gl.resize(dd->ncg_home);
229 for (i = 0; i < dd->ncg_home; i++)
231 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
234 state->ddp_count_cg_gl = dd->ddp_count;
237 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
239 return &dd->comm->zones;
242 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
243 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
245 gmx_domdec_zones_t *zones;
248 zones = &dd->comm->zones;
251 while (icg >= zones->izone[izone].cg1)
260 else if (izone < zones->nizone)
262 *jcg0 = zones->izone[izone].jcg0;
266 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
267 icg, izone, zones->nizone);
270 *jcg1 = zones->izone[izone].jcg1;
272 for (d = 0; d < dd->ndim; d++)
275 shift0[dim] = zones->izone[izone].shift0[dim];
276 shift1[dim] = zones->izone[izone].shift1[dim];
277 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
279 /* A conservative approach, this can be optimized */
286 int dd_numHomeAtoms(const gmx_domdec_t &dd)
288 return dd.comm->atomRanges.numHomeAtoms();
291 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
293 /* We currently set mdatoms entries for all atoms:
294 * local + non-local + communicated for vsite + constraints
297 return dd->comm->atomRanges.numAtomsTotal();
300 int dd_natoms_vsite(const gmx_domdec_t *dd)
302 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
305 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
307 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
308 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
311 void dd_move_x(gmx_domdec_t *dd,
313 gmx::ArrayRef<gmx::RVec> x,
314 gmx_wallcycle *wcycle)
316 wallcycle_start(wcycle, ewcMOVEX);
319 gmx_domdec_comm_t *comm;
320 gmx_domdec_comm_dim_t *cd;
321 rvec shift = {0, 0, 0};
322 gmx_bool bPBC, bScrew;
327 nat_tot = comm->atomRanges.numHomeAtoms();
328 for (int d = 0; d < dd->ndim; d++)
330 bPBC = (dd->ci[dd->dim[d]] == 0);
331 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
334 copy_rvec(box[dd->dim[d]], shift);
337 for (const gmx_domdec_ind_t &ind : cd->ind)
339 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
340 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
344 for (int j : ind.index)
346 sendBuffer[n] = x[j];
352 for (int j : ind.index)
354 /* We need to shift the coordinates */
355 for (int d = 0; d < DIM; d++)
357 sendBuffer[n][d] = x[j][d] + shift[d];
364 for (int j : ind.index)
367 sendBuffer[n][XX] = x[j][XX] + shift[XX];
369 * This operation requires a special shift force
370 * treatment, which is performed in calc_vir.
372 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
373 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
378 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
380 gmx::ArrayRef<gmx::RVec> receiveBuffer;
381 if (cd->receiveInPlace)
383 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
387 receiveBuffer = receiveBufferAccess.buffer;
389 /* Send and receive the coordinates */
390 ddSendrecv(dd, d, dddirBackward,
391 sendBuffer, receiveBuffer);
393 if (!cd->receiveInPlace)
396 for (int zone = 0; zone < nzone; zone++)
398 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
400 x[i] = receiveBuffer[j++];
404 nat_tot += ind.nrecv[nzone+1];
409 wallcycle_stop(wcycle, ewcMOVEX);
412 void dd_move_f(gmx_domdec_t *dd,
413 gmx::ForceWithShiftForces *forceWithShiftForces,
414 gmx_wallcycle *wcycle)
416 wallcycle_start(wcycle, ewcMOVEF);
418 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
419 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
421 gmx_domdec_comm_t &comm = *dd->comm;
422 int nzone = comm.zones.n/2;
423 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
424 for (int d = dd->ndim-1; d >= 0; d--)
426 /* Only forces in domains near the PBC boundaries need to
427 consider PBC in the treatment of fshift */
428 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
429 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
430 /* Determine which shift vector we need */
431 ivec vis = { 0, 0, 0 };
433 const int is = IVEC2IS(vis);
435 /* Loop over the pulses */
436 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
437 for (int p = cd.numPulses() - 1; p >= 0; p--)
439 const gmx_domdec_ind_t &ind = cd.ind[p];
440 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
441 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
443 nat_tot -= ind.nrecv[nzone+1];
445 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
447 gmx::ArrayRef<gmx::RVec> sendBuffer;
448 if (cd.receiveInPlace)
450 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
454 sendBuffer = sendBufferAccess.buffer;
456 for (int zone = 0; zone < nzone; zone++)
458 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
460 sendBuffer[j++] = f[i];
464 /* Communicate the forces */
465 ddSendrecv(dd, d, dddirForward,
466 sendBuffer, receiveBuffer);
467 /* Add the received forces */
469 if (!shiftForcesNeedPbc)
471 for (int j : ind.index)
473 for (int d = 0; d < DIM; d++)
475 f[j][d] += receiveBuffer[n][d];
480 else if (!applyScrewPbc)
482 for (int j : ind.index)
484 for (int d = 0; d < DIM; d++)
486 f[j][d] += receiveBuffer[n][d];
488 /* Add this force to the shift force */
489 for (int d = 0; d < DIM; d++)
491 fshift[is][d] += receiveBuffer[n][d];
498 for (int j : ind.index)
500 /* Rotate the force */
501 f[j][XX] += receiveBuffer[n][XX];
502 f[j][YY] -= receiveBuffer[n][YY];
503 f[j][ZZ] -= receiveBuffer[n][ZZ];
504 if (shiftForcesNeedPbc)
506 /* Add this force to the shift force */
507 for (int d = 0; d < DIM; d++)
509 fshift[is][d] += receiveBuffer[n][d];
518 wallcycle_stop(wcycle, ewcMOVEF);
521 /* Convenience function for extracting a real buffer from an rvec buffer
523 * To reduce the number of temporary communication buffers and avoid
524 * cache polution, we reuse gmx::RVec buffers for storing reals.
525 * This functions return a real buffer reference with the same number
526 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
528 static gmx::ArrayRef<real>
529 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
531 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
535 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
538 gmx_domdec_comm_t *comm;
539 gmx_domdec_comm_dim_t *cd;
544 nat_tot = comm->atomRanges.numHomeAtoms();
545 for (int d = 0; d < dd->ndim; d++)
548 for (const gmx_domdec_ind_t &ind : cd->ind)
550 /* Note: We provision for RVec instead of real, so a factor of 3
551 * more than needed. The buffer actually already has this size
552 * and we pass a plain pointer below, so this does not matter.
554 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
555 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
557 for (int j : ind.index)
559 sendBuffer[n++] = v[j];
562 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
564 gmx::ArrayRef<real> receiveBuffer;
565 if (cd->receiveInPlace)
567 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
571 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
573 /* Send and receive the data */
574 ddSendrecv(dd, d, dddirBackward,
575 sendBuffer, receiveBuffer);
576 if (!cd->receiveInPlace)
579 for (int zone = 0; zone < nzone; zone++)
581 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
583 v[i] = receiveBuffer[j++];
587 nat_tot += ind.nrecv[nzone+1];
593 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
596 gmx_domdec_comm_t *comm;
597 gmx_domdec_comm_dim_t *cd;
601 nzone = comm->zones.n/2;
602 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
603 for (int d = dd->ndim-1; d >= 0; d--)
606 for (int p = cd->numPulses() - 1; p >= 0; p--)
608 const gmx_domdec_ind_t &ind = cd->ind[p];
610 /* Note: We provision for RVec instead of real, so a factor of 3
611 * more than needed. The buffer actually already has this size
612 * and we typecast, so this works as intended.
614 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
615 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
616 nat_tot -= ind.nrecv[nzone + 1];
618 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
620 gmx::ArrayRef<real> sendBuffer;
621 if (cd->receiveInPlace)
623 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
627 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
629 for (int zone = 0; zone < nzone; zone++)
631 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
633 sendBuffer[j++] = v[i];
637 /* Communicate the forces */
638 ddSendrecv(dd, d, dddirForward,
639 sendBuffer, receiveBuffer);
640 /* Add the received forces */
642 for (int j : ind.index)
644 v[j] += receiveBuffer[n];
652 real dd_cutoff_multibody(const gmx_domdec_t *dd)
654 const gmx_domdec_comm_t &comm = *dd->comm;
655 const DDSystemInfo &systemInfo = comm.systemInfo;
658 if (systemInfo.haveInterDomainMultiBodyBondeds)
660 if (comm.cutoff_mbody > 0)
662 r = comm.cutoff_mbody;
666 /* cutoff_mbody=0 means we do not have DLB */
667 r = comm.cellsize_min[dd->dim[0]];
668 for (int di = 1; di < dd->ndim; di++)
670 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
672 if (comm.systemInfo.filterBondedCommunication)
674 r = std::max(r, comm.cutoff_mbody);
678 r = std::min(r, systemInfo.cutoff);
686 real dd_cutoff_twobody(const gmx_domdec_t *dd)
690 r_mb = dd_cutoff_multibody(dd);
692 return std::max(dd->comm->systemInfo.cutoff, r_mb);
696 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
697 const CartesianRankSetup &cartSetup,
701 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
702 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
703 copy_ivec(coord, coord_pme);
704 coord_pme[cartSetup.cartpmedim] =
705 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
708 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
709 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
710 const int ddCellIndex)
712 const int npp = ddRankSetup.numPPRanks;
713 const int npme = ddRankSetup.numRanksDoingPme;
715 /* Here we assign a PME node to communicate with this DD node
716 * by assuming that the major index of both is x.
717 * We add npme/2 to obtain an even distribution.
719 return (ddCellIndex*npme + npme/2)/npp;
722 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
724 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
727 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
729 const int p0 = ddindex2pmeindex(ddRankSetup, i);
730 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
731 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
735 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
737 pmeRanks[n] = i + 1 + n;
745 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
753 if (dd->comm->bCartesian) {
754 gmx_ddindex2xyz(dd->nc,ddindex,coords);
755 dd_coords2pmecoords(dd,coords,coords_pme);
756 copy_ivec(dd->ntot,nc);
757 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
758 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
760 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
762 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
768 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
773 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
775 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
776 ivec coords = { x, y, z };
779 if (cartSetup.bCartesianPP_PME)
782 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
787 int ddindex = dd_index(cr->dd->nc, coords);
788 if (cartSetup.bCartesianPP)
790 nodeid = cartSetup.ddindex2simnodeid[ddindex];
794 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
796 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
808 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
809 const CartesianRankSetup &cartSetup,
810 gmx::ArrayRef<const int> pmeRanks,
811 const t_commrec gmx_unused *cr,
812 const int sim_nodeid)
816 /* This assumes a uniform x domain decomposition grid cell size */
817 if (cartSetup.bCartesianPP_PME)
820 ivec coord, coord_pme;
821 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
822 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
824 /* This is a PP rank */
825 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
826 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
830 else if (cartSetup.bCartesianPP)
832 if (sim_nodeid < ddRankSetup.numPPRanks)
834 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
839 /* This assumes DD cells with identical x coordinates
840 * are numbered sequentially.
842 if (pmeRanks.empty())
844 if (sim_nodeid < ddRankSetup.numPPRanks)
846 /* The DD index equals the nodeid */
847 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
853 while (sim_nodeid > pmeRanks[i])
857 if (sim_nodeid < pmeRanks[i])
859 pmenode = pmeRanks[i];
867 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
872 dd->comm->ddRankSetup.npmenodes_x,
873 dd->comm->ddRankSetup.npmenodes_y
884 std::vector<int> get_pme_ddranks(const t_commrec *cr,
887 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
888 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
889 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
890 std::vector<int> ddranks;
891 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
893 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
895 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
897 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
899 if (cartSetup.bCartesianPP_PME)
901 ivec coord = { x, y, z };
903 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
904 if (cr->dd->ci[XX] == coord_pme[XX] &&
905 cr->dd->ci[YY] == coord_pme[YY] &&
906 cr->dd->ci[ZZ] == coord_pme[ZZ])
908 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
913 /* The slab corresponds to the nodeid in the PME group */
914 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
916 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
925 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
926 gmx::ArrayRef<const int> pmeRanks,
929 gmx_bool bReceive = TRUE;
931 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
932 if (ddRankSetup.usePmeOnlyRanks)
934 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
935 if (cartSetup.bCartesianPP_PME)
938 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
940 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
941 coords[cartSetup.cartpmedim]++;
942 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
945 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
946 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
948 /* This is not the last PP node for pmenode */
953 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
958 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
959 if (cr->sim_nodeid+1 < cr->nnodes &&
960 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
962 /* This is not the last PP node for pmenode */
971 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
973 gmx_domdec_comm_t *comm;
978 snew(*dim_f, dd->nc[dim]+1);
980 for (i = 1; i < dd->nc[dim]; i++)
982 if (comm->slb_frac[dim])
984 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
988 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
991 (*dim_f)[dd->nc[dim]] = 1;
994 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
996 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
998 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
1004 ddpme->dim = dimind;
1006 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1008 ddpme->nslab = (ddpme->dim == 0 ?
1009 ddRankSetup.npmenodes_x :
1010 ddRankSetup.npmenodes_y);
1012 if (ddpme->nslab <= 1)
1017 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
1018 /* Determine for each PME slab the PP location range for dimension dim */
1019 snew(ddpme->pp_min, ddpme->nslab);
1020 snew(ddpme->pp_max, ddpme->nslab);
1021 for (int slab = 0; slab < ddpme->nslab; slab++)
1023 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1024 ddpme->pp_max[slab] = 0;
1026 for (int i = 0; i < dd->nnodes; i++)
1029 ddindex2xyz(dd->nc, i, xyz);
1030 /* For y only use our y/z slab.
1031 * This assumes that the PME x grid size matches the DD grid size.
1033 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1035 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
1039 slab = pmeindex/nso;
1043 slab = pmeindex % ddpme->nslab;
1045 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1046 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1050 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1053 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1055 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1057 if (ddRankSetup.ddpme[0].dim == XX)
1059 return ddRankSetup.ddpme[0].maxshift;
1067 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1069 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1071 if (ddRankSetup.ddpme[0].dim == YY)
1073 return ddRankSetup.ddpme[0].maxshift;
1075 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1077 return ddRankSetup.ddpme[1].maxshift;
1085 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1087 return dd.comm->systemInfo.haveSplitConstraints;
1090 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1092 /* Note that the cycles value can be incorrect, either 0 or some
1093 * extremely large value, when our thread migrated to another core
1094 * with an unsynchronized cycle counter. If this happens less often
1095 * that once per nstlist steps, this will not cause issues, since
1096 * we later subtract the maximum value from the sum over nstlist steps.
1097 * A zero count will slightly lower the total, but that's a small effect.
1098 * Note that the main purpose of the subtraction of the maximum value
1099 * is to avoid throwing off the load balancing when stalls occur due
1100 * e.g. system activity or network congestion.
1102 dd->comm->cycl[ddCycl] += cycles;
1103 dd->comm->cycl_n[ddCycl]++;
1104 if (cycles > dd->comm->cycl_max[ddCycl])
1106 dd->comm->cycl_max[ddCycl] = cycles;
1111 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1116 gmx_bool bPartOfGroup = FALSE;
1118 dim = dd->dim[dim_ind];
1119 copy_ivec(loc, loc_c);
1120 for (i = 0; i < dd->nc[dim]; i++)
1123 rank = dd_index(dd->nc, loc_c);
1124 if (rank == dd->rank)
1126 /* This process is part of the group */
1127 bPartOfGroup = TRUE;
1130 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1134 dd->comm->mpi_comm_load[dim_ind] = c_row;
1135 if (!isDlbDisabled(dd->comm))
1137 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1139 if (dd->ci[dim] == dd->master_ci[dim])
1141 /* This is the root process of this row */
1142 cellsizes.rowMaster = std::make_unique<RowMaster>();
1144 RowMaster &rowMaster = *cellsizes.rowMaster;
1145 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1146 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1147 rowMaster.isCellMin.resize(dd->nc[dim]);
1150 rowMaster.bounds.resize(dd->nc[dim]);
1152 rowMaster.buf_ncd.resize(dd->nc[dim]);
1156 /* This is not a root process, we only need to receive cell_f */
1157 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1160 if (dd->ci[dim] == dd->master_ci[dim])
1162 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1168 void dd_setup_dlb_resource_sharing(t_commrec *cr,
1172 int physicalnode_id_hash;
1174 MPI_Comm mpi_comm_pp_physicalnode;
1176 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1178 /* Only ranks with short-ranged tasks (currently) use GPUs.
1179 * If we don't have GPUs assigned, there are no resources to share.
1184 physicalnode_id_hash = gmx_physicalnode_id_hash();
1190 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1191 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1192 dd->rank, physicalnode_id_hash, gpu_id);
1194 /* Split the PP communicator over the physical nodes */
1195 /* TODO: See if we should store this (before), as it's also used for
1196 * for the nodecomm summation.
1198 // TODO PhysicalNodeCommunicator could be extended/used to handle
1199 // the need for per-node per-group communicators.
1200 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1201 &mpi_comm_pp_physicalnode);
1202 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1203 &dd->comm->mpi_comm_gpu_shared);
1204 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1205 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1209 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1212 /* Note that some ranks could share a GPU, while others don't */
1214 if (dd->comm->nrank_gpu_shared == 1)
1216 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1219 GMX_UNUSED_VALUE(cr);
1220 GMX_UNUSED_VALUE(gpu_id);
1224 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1227 int dim0, dim1, i, j;
1232 fprintf(debug, "Making load communicators\n");
1235 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1236 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1244 make_load_communicator(dd, 0, loc);
1248 for (i = 0; i < dd->nc[dim0]; i++)
1251 make_load_communicator(dd, 1, loc);
1257 for (i = 0; i < dd->nc[dim0]; i++)
1261 for (j = 0; j < dd->nc[dim1]; j++)
1264 make_load_communicator(dd, 2, loc);
1271 fprintf(debug, "Finished making load communicators\n");
1276 /*! \brief Sets up the relation between neighboring domains and zones */
1277 static void setup_neighbor_relations(gmx_domdec_t *dd)
1279 int d, dim, i, j, m;
1281 gmx_domdec_zones_t *zones;
1282 gmx_domdec_ns_ranges_t *izone;
1283 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1285 for (d = 0; d < dd->ndim; d++)
1288 copy_ivec(dd->ci, tmp);
1289 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1290 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1291 copy_ivec(dd->ci, tmp);
1292 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1293 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1296 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1299 dd->neighbor[d][1]);
1303 int nzone = (1 << dd->ndim);
1304 int nizone = (1 << std::max(dd->ndim - 1, 0));
1305 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1307 zones = &dd->comm->zones;
1309 for (i = 0; i < nzone; i++)
1312 clear_ivec(zones->shift[i]);
1313 for (d = 0; d < dd->ndim; d++)
1315 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1320 for (i = 0; i < nzone; i++)
1322 for (d = 0; d < DIM; d++)
1324 s[d] = dd->ci[d] - zones->shift[i][d];
1329 else if (s[d] >= dd->nc[d])
1335 zones->nizone = nizone;
1336 for (i = 0; i < zones->nizone; i++)
1338 assert(ddNonbondedZonePairRanges[i][0] == i);
1340 izone = &zones->izone[i];
1341 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1342 * j-zones up to nzone.
1344 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1345 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1346 for (dim = 0; dim < DIM; dim++)
1348 if (dd->nc[dim] == 1)
1350 /* All shifts should be allowed */
1351 izone->shift0[dim] = -1;
1352 izone->shift1[dim] = 1;
1356 /* Determine the min/max j-zone shift wrt the i-zone */
1357 izone->shift0[dim] = 1;
1358 izone->shift1[dim] = -1;
1359 for (j = izone->j0; j < izone->j1; j++)
1361 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1362 if (shift_diff < izone->shift0[dim])
1364 izone->shift0[dim] = shift_diff;
1366 if (shift_diff > izone->shift1[dim])
1368 izone->shift1[dim] = shift_diff;
1375 if (!isDlbDisabled(dd->comm))
1377 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1380 if (dd->comm->ddSettings.recordLoad)
1382 make_load_communicators(dd);
1386 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1388 t_commrec gmx_unused *cr,
1389 bool gmx_unused reorder)
1392 gmx_domdec_comm_t *comm = dd->comm;
1393 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1395 if (cartSetup.bCartesianPP)
1397 /* Set up cartesian communication for the particle-particle part */
1398 GMX_LOG(mdlog.info).appendTextFormatted(
1399 "Will use a Cartesian communicator: %d x %d x %d",
1400 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1403 for (int i = 0; i < DIM; i++)
1408 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1410 /* We overwrite the old communicator with the new cartesian one */
1411 cr->mpi_comm_mygroup = comm_cart;
1414 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1415 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1417 if (cartSetup.bCartesianPP_PME)
1419 /* Since we want to use the original cartesian setup for sim,
1420 * and not the one after split, we need to make an index.
1422 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1423 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1424 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1425 /* Get the rank of the DD master,
1426 * above we made sure that the master node is a PP node.
1437 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1439 else if (cartSetup.bCartesianPP)
1441 if (!comm->ddRankSetup.usePmeOnlyRanks)
1443 /* The PP communicator is also
1444 * the communicator for this simulation
1446 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1448 cr->nodeid = dd->rank;
1450 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1452 /* We need to make an index to go from the coordinates
1453 * to the nodeid of this simulation.
1455 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1456 std::vector<int> buf(dd->nnodes);
1457 if (thisRankHasDuty(cr, DUTY_PP))
1459 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1461 /* Communicate the ddindex to simulation nodeid index */
1462 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1463 cr->mpi_comm_mysim);
1465 /* Determine the master coordinates and rank.
1466 * The DD master should be the same node as the master of this sim.
1468 for (int i = 0; i < dd->nnodes; i++)
1470 if (cartSetup.ddindex2simnodeid[i] == 0)
1472 ddindex2xyz(dd->nc, i, dd->master_ci);
1473 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1478 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1483 /* No Cartesian communicators */
1484 /* We use the rank in dd->comm->all as DD index */
1485 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1486 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1488 clear_ivec(dd->master_ci);
1492 GMX_LOG(mdlog.info).appendTextFormatted(
1493 "Domain decomposition rank %d, coordinates %d %d %d\n",
1494 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1498 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1499 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1503 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1507 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1509 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1511 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1512 std::vector<int> buf(dd->nnodes);
1513 if (thisRankHasDuty(cr, DUTY_PP))
1515 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1517 /* Communicate the ddindex to simulation nodeid index */
1518 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1519 cr->mpi_comm_mysim);
1522 GMX_UNUSED_VALUE(dd);
1523 GMX_UNUSED_VALUE(cr);
1527 static CartesianRankSetup
1528 split_communicator(const gmx::MDLogger &mdlog,
1530 const DdRankOrder ddRankOrder,
1531 bool gmx_unused reorder,
1532 const DDRankSetup &ddRankSetup,
1534 std::vector<int> *pmeRanks)
1536 CartesianRankSetup cartSetup;
1538 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1539 cartSetup.bCartesianPP_PME = false;
1541 const ivec &numDDCells = ddRankSetup.numPPCells;
1542 /* Initially we set ntot to the number of PP cells */
1543 copy_ivec(numDDCells, cartSetup.ntot);
1545 if (cartSetup.bCartesianPP)
1547 const int numDDCellsTot = ddRankSetup.numPPRanks;
1549 for (int i = 1; i < DIM; i++)
1551 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1553 if (bDiv[YY] || bDiv[ZZ])
1555 cartSetup.bCartesianPP_PME = TRUE;
1556 /* If we have 2D PME decomposition, which is always in x+y,
1557 * we stack the PME only nodes in z.
1558 * Otherwise we choose the direction that provides the thinnest slab
1559 * of PME only nodes as this will have the least effect
1560 * on the PP communication.
1561 * But for the PME communication the opposite might be better.
1563 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1565 numDDCells[YY] > numDDCells[ZZ]))
1567 cartSetup.cartpmedim = ZZ;
1571 cartSetup.cartpmedim = YY;
1573 cartSetup.ntot[cartSetup.cartpmedim]
1574 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1578 GMX_LOG(mdlog.info).appendTextFormatted(
1579 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1580 ddRankSetup.numRanksDoingPme,
1581 numDDCells[XX], numDDCells[YY],
1582 numDDCells[XX], numDDCells[ZZ]);
1583 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1587 if (cartSetup.bCartesianPP_PME)
1593 GMX_LOG(mdlog.info).appendTextFormatted(
1594 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1595 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1597 for (int i = 0; i < DIM; i++)
1602 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1604 MPI_Comm_rank(comm_cart, &rank);
1605 if (MASTER(cr) && rank != 0)
1607 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1610 /* With this assigment we loose the link to the original communicator
1611 * which will usually be MPI_COMM_WORLD, unless have multisim.
1613 cr->mpi_comm_mysim = comm_cart;
1614 cr->sim_nodeid = rank;
1616 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1618 GMX_LOG(mdlog.info).appendTextFormatted(
1619 "Cartesian rank %d, coordinates %d %d %d\n",
1620 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1622 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1626 if (!ddRankSetup.usePmeOnlyRanks ||
1627 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1629 cr->duty = DUTY_PME;
1632 /* Split the sim communicator into PP and PME only nodes */
1633 MPI_Comm_split(cr->mpi_comm_mysim,
1634 getThisRankDuties(cr),
1635 dd_index(cartSetup.ntot, ddCellIndex),
1636 &cr->mpi_comm_mygroup);
1638 GMX_UNUSED_VALUE(ddCellIndex);
1643 switch (ddRankOrder)
1645 case DdRankOrder::pp_pme:
1646 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1648 case DdRankOrder::interleave:
1649 /* Interleave the PP-only and PME-only ranks */
1650 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1651 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1653 case DdRankOrder::cartesian:
1656 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1659 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1661 cr->duty = DUTY_PME;
1668 /* Split the sim communicator into PP and PME only nodes */
1669 MPI_Comm_split(cr->mpi_comm_mysim,
1670 getThisRankDuties(cr),
1672 &cr->mpi_comm_mygroup);
1673 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1677 GMX_LOG(mdlog.info).appendTextFormatted(
1678 "This rank does only %s work.\n",
1679 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1684 /*! \brief Makes the PP communicator and the PME communicator, when needed
1686 * Returns the Cartesian rank setup.
1687 * Sets \p cr->mpi_comm_mygroup
1688 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1689 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1691 static CartesianRankSetup
1692 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1693 const DDSettings &ddSettings,
1694 const DdRankOrder ddRankOrder,
1695 const DDRankSetup &ddRankSetup,
1698 std::vector<int> *pmeRanks)
1700 CartesianRankSetup cartSetup;
1702 if (ddRankSetup.usePmeOnlyRanks)
1704 /* Split the communicator into a PP and PME part */
1706 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1707 ddRankSetup, ddCellIndex, pmeRanks);
1711 /* All nodes do PP and PME */
1712 /* We do not require separate communicators */
1713 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1715 cartSetup.bCartesianPP = false;
1716 cartSetup.bCartesianPP_PME = false;
1722 /*! \brief For PP ranks, sets or makes the communicator
1724 * For PME ranks get the rank id.
1725 * For PP only ranks, sets the PME-only rank.
1727 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1728 const DDSettings &ddSettings,
1729 gmx::ArrayRef<const int> pmeRanks,
1733 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1734 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1736 if (thisRankHasDuty(cr, DUTY_PP))
1738 /* Copy or make a new PP communicator */
1740 /* We (possibly) reordered the nodes in split_communicator,
1741 * so it is no longer required in make_pp_communicator.
1743 const bool useCartesianReorder =
1744 (ddSettings.useCartesianReorder &&
1745 !cartSetup.bCartesianPP_PME);
1747 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1751 receive_ddindex2simnodeid(dd, cr);
1754 if (!thisRankHasDuty(cr, DUTY_PME))
1756 /* Set up the commnuication to our PME node */
1757 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1758 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1761 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1762 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1767 dd->pme_nodeid = -1;
1770 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1773 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1774 dd->comm->cgs_gl.nr,
1775 dd->comm->cgs_gl.index[dd->comm->cgs_gl.nr]);
1779 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1780 const char *dir, int nc, const char *size_string)
1782 real *slb_frac, tot;
1787 if (nc > 1 && size_string != nullptr)
1789 GMX_LOG(mdlog.info).appendTextFormatted(
1790 "Using static load balancing for the %s direction", dir);
1793 for (i = 0; i < nc; i++)
1796 sscanf(size_string, "%20lf%n", &dbl, &n);
1799 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1806 std::string relativeCellSizes = "Relative cell sizes:";
1807 for (i = 0; i < nc; i++)
1810 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1812 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1818 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1821 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1823 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1825 for (auto &ilist : extractILists(*ilists, IF_BOND))
1827 if (NRAL(ilist.functionType) > 2)
1829 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1837 static int dd_getenv(const gmx::MDLogger &mdlog,
1838 const char *env_var, int def)
1844 val = getenv(env_var);
1847 if (sscanf(val, "%20d", &nst) <= 0)
1851 GMX_LOG(mdlog.info).appendTextFormatted(
1852 "Found env.var. %s = %s, using value %d",
1859 static void check_dd_restrictions(const gmx_domdec_t *dd,
1860 const t_inputrec *ir,
1861 const gmx::MDLogger &mdlog)
1863 if (ir->ePBC == epbcSCREW &&
1864 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1866 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1869 if (ir->nstlist == 0)
1871 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1874 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1876 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1880 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1881 const ivec numDomains)
1883 real r = ddbox.box_size[XX];
1884 for (int d = 0; d < DIM; d++)
1886 if (numDomains[d] > 1)
1888 /* Check using the initial average cell size */
1889 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1896 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1898 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1899 const std::string &reasonStr,
1900 const gmx::MDLogger &mdlog)
1902 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1903 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1905 if (cmdlineDlbState == DlbState::onUser)
1907 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1909 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1911 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1913 return DlbState::offForever;
1916 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1918 * This function parses the parameters of "-dlb" command line option setting
1919 * corresponding state values. Then it checks the consistency of the determined
1920 * state with other run parameters and settings. As a result, the initial state
1921 * may be altered or an error may be thrown if incompatibility of options is detected.
1923 * \param [in] mdlog Logger.
1924 * \param [in] dlbOption Enum value for the DLB option.
1925 * \param [in] bRecordLoad True if the load balancer is recording load information.
1926 * \param [in] mdrunOptions Options for mdrun.
1927 * \param [in] ir Pointer mdrun to input parameters.
1928 * \returns DLB initial/startup state.
1930 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1931 DlbOption dlbOption, gmx_bool bRecordLoad,
1932 const gmx::MdrunOptions &mdrunOptions,
1933 const t_inputrec *ir)
1935 DlbState dlbState = DlbState::offCanTurnOn;
1939 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1940 case DlbOption::no: dlbState = DlbState::offUser; break;
1941 case DlbOption::yes: dlbState = DlbState::onUser; break;
1942 default: gmx_incons("Invalid dlbOption enum value");
1945 /* Reruns don't support DLB: bail or override auto mode */
1946 if (mdrunOptions.rerun)
1948 std::string reasonStr = "it is not supported in reruns.";
1949 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1952 /* Unsupported integrators */
1953 if (!EI_DYNAMICS(ir->eI))
1955 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1956 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1959 /* Without cycle counters we can't time work to balance on */
1962 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1963 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1966 if (mdrunOptions.reproducible)
1968 std::string reasonStr = "you started a reproducible run.";
1971 case DlbState::offUser:
1973 case DlbState::offForever:
1974 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1976 case DlbState::offCanTurnOn:
1977 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1978 case DlbState::onCanTurnOff:
1979 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1981 case DlbState::onUser:
1982 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1984 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1991 static gmx_domdec_comm_t *init_dd_comm()
1993 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1995 comm->n_load_have = 0;
1996 comm->n_load_collect = 0;
1998 comm->haveTurnedOffDlb = false;
2000 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2002 comm->sum_nat[i] = 0;
2006 comm->load_step = 0;
2009 clear_ivec(comm->load_lim);
2013 /* This should be replaced by a unique pointer */
2014 comm->balanceRegion = ddBalanceRegionAllocate();
2019 /* Returns whether mtop contains constraints and/or vsites */
2020 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2022 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2024 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2026 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2035 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2036 const gmx_mtop_t &mtop,
2037 const t_inputrec &inputrec,
2038 const real cutoffMargin,
2039 DDSystemInfo *systemInfo)
2041 /* When we have constraints and/or vsites, it is beneficial to use
2042 * update groups (when possible) to allow independent update of groups.
2044 if (!systemHasConstraintsOrVsites(mtop))
2046 /* No constraints or vsites, atoms can be updated independently */
2050 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2051 systemInfo->useUpdateGroups =
2052 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2053 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2055 if (systemInfo->useUpdateGroups)
2057 int numUpdateGroups = 0;
2058 for (const auto &molblock : mtop.molblock)
2060 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2063 systemInfo->maxUpdateGroupRadius =
2064 computeMaxUpdateGroupRadius(mtop,
2065 systemInfo->updateGroupingPerMoleculetype,
2066 maxReferenceTemperature(inputrec));
2068 /* To use update groups, the large domain-to-domain cutoff distance
2069 * should be compatible with the box size.
2071 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2073 if (systemInfo->useUpdateGroups)
2075 GMX_LOG(mdlog.info).appendTextFormatted(
2076 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2078 mtop.natoms/static_cast<double>(numUpdateGroups),
2079 systemInfo->maxUpdateGroupRadius);
2083 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2084 systemInfo->updateGroupingPerMoleculetype.clear();
2089 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2090 npbcdim(ePBC2npbcdim(ir.ePBC)),
2091 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2092 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2093 haveScrewPBC(ir.ePBC == epbcSCREW)
2097 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2099 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2100 const bool useUpdateGroups,
2101 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2103 if (useUpdateGroups)
2105 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2106 "Need one grouping per moltype");
2107 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2109 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2117 for (const auto &moltype : mtop.moltype)
2119 if (moltype.atoms.nr > 1)
2129 /*! \brief Generate the simulation system information */
2131 getSystemInfo(const gmx::MDLogger &mdlog,
2132 const t_commrec *cr,
2133 const DomdecOptions &options,
2134 const gmx_mtop_t *mtop,
2135 const t_inputrec *ir,
2137 gmx::ArrayRef<const gmx::RVec> xGlobal)
2139 const real tenPercentMargin = 1.1;
2141 DDSystemInfo systemInfo;
2143 /* We need to decide on update groups early, as this affects communication distances */
2144 systemInfo.useUpdateGroups = false;
2145 if (ir->cutoff_scheme == ecutsVERLET)
2147 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2148 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2151 systemInfo.moleculesAreAlwaysWhole =
2152 moleculesAreAlwaysWhole(*mtop,
2153 systemInfo.useUpdateGroups,
2154 systemInfo.updateGroupingPerMoleculetype);
2155 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2156 mtop->bIntermolecularInteractions);
2157 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2158 multi_body_bondeds_count(mtop) > 0);
2160 if (systemInfo.useUpdateGroups)
2162 systemInfo.haveSplitConstraints = false;
2163 systemInfo.haveSplitSettles = false;
2167 systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2168 systemInfo.haveSplitSettles = gmx::inter_charge_group_settles(*mtop);
2173 /* Set the cut-off to some very large value,
2174 * so we don't need if statements everywhere in the code.
2175 * We use sqrt, since the cut-off is squared in some places.
2177 systemInfo.cutoff = GMX_CUTOFF_INF;
2181 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2183 systemInfo.minCutoffForMultiBody = 0;
2185 /* Determine the minimum cell size limit, affected by many factors */
2186 systemInfo.cellsizeLimit = 0;
2187 systemInfo.filterBondedCommunication = false;
2189 /* We do not allow home atoms to move beyond the neighboring domain
2190 * between domain decomposition steps, which limits the cell size.
2191 * Get an estimate of cell size limit due to atom displacement.
2192 * In most cases this is a large overestimate, because it assumes
2193 * non-interaction atoms.
2194 * We set the chance to 1 in a trillion steps.
2196 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2197 const real limitForAtomDisplacement =
2198 minCellSizeForAtomDisplacement(*mtop, *ir,
2199 systemInfo.updateGroupingPerMoleculetype,
2200 c_chanceThatAtomMovesBeyondDomain);
2201 GMX_LOG(mdlog.info).appendTextFormatted(
2202 "Minimum cell size due to atom displacement: %.3f nm",
2203 limitForAtomDisplacement);
2205 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2206 limitForAtomDisplacement);
2208 /* TODO: PME decomposition currently requires atoms not to be more than
2209 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2210 * In nearly all cases, limitForAtomDisplacement will be smaller
2211 * than 2/3*rlist, so the PME requirement is satisfied.
2212 * But it would be better for both correctness and performance
2213 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2214 * Note that we would need to improve the pairlist buffer case.
2217 if (systemInfo.haveInterDomainBondeds)
2219 if (options.minimumCommunicationRange > 0)
2221 systemInfo.minCutoffForMultiBody =
2222 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2223 if (options.useBondedCommunication)
2225 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2229 systemInfo.cutoff = std::max(systemInfo.cutoff,
2230 systemInfo.minCutoffForMultiBody);
2233 else if (ir->bPeriodicMols)
2235 /* Can not easily determine the required cut-off */
2236 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2237 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2245 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2246 options.checkBondedInteractions,
2249 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2250 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2252 /* We use an initial margin of 10% for the minimum cell size,
2253 * except when we are just below the non-bonded cut-off.
2255 if (options.useBondedCommunication)
2257 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2259 const real r_bonded = std::max(r_2b, r_mb);
2260 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2261 /* This is the (only) place where we turn on the filtering */
2262 systemInfo.filterBondedCommunication = true;
2266 const real r_bonded = r_mb;
2267 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2270 /* We determine cutoff_mbody later */
2271 systemInfo.increaseMultiBodyCutoff = true;
2275 /* No special bonded communication,
2276 * simply increase the DD cut-off.
2278 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2279 systemInfo.cutoff = std::max(systemInfo.cutoff,
2280 systemInfo.minCutoffForMultiBody);
2283 GMX_LOG(mdlog.info).appendTextFormatted(
2284 "Minimum cell size due to bonded interactions: %.3f nm",
2285 systemInfo.minCutoffForMultiBody);
2287 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2288 systemInfo.minCutoffForMultiBody);
2291 systemInfo.constraintCommunicationRange = 0;
2292 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2294 /* There is a cell size limit due to the constraints (P-LINCS) */
2295 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2296 GMX_LOG(mdlog.info).appendTextFormatted(
2297 "Estimated maximum distance required for P-LINCS: %.3f nm",
2298 systemInfo.constraintCommunicationRange);
2299 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2301 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2304 else if (options.constraintCommunicationRange > 0)
2306 /* Here we do not check for dd->splitConstraints.
2307 * because one can also set a cell size limit for virtual sites only
2308 * and at this point we don't know yet if there are intercg v-sites.
2310 GMX_LOG(mdlog.info).appendTextFormatted(
2311 "User supplied maximum distance required for P-LINCS: %.3f nm",
2312 options.constraintCommunicationRange);
2313 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2315 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2316 systemInfo.constraintCommunicationRange);
2321 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2324 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2325 const t_commrec *cr,
2326 const DomdecOptions &options,
2327 const DDSettings &ddSettings,
2328 const DDSystemInfo &systemInfo,
2329 const real cellsizeLimit,
2330 const gmx_ddbox_t &ddbox)
2332 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2335 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2336 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2337 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2338 !bC ? "-rdd" : "-rcon",
2339 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2340 bC ? " or your LINCS settings" : "");
2342 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2343 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2345 "Look in the log file for details on the domain decomposition",
2346 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2351 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2352 if (acs < cellsizeLimit)
2354 if (options.numCells[XX] <= 0)
2356 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2360 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2361 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2362 acs, cellsizeLimit);
2366 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2367 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2369 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2370 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2371 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2373 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2375 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2376 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2380 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2382 getDDRankSetup(const gmx::MDLogger &mdlog,
2384 const DDGridSetup &ddGridSetup,
2385 const t_inputrec &ir)
2387 GMX_LOG(mdlog.info).appendTextFormatted(
2388 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2389 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2390 ddGridSetup.numPmeOnlyRanks);
2392 DDRankSetup ddRankSetup;
2394 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2395 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2397 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2398 if (ddRankSetup.usePmeOnlyRanks)
2400 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2404 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2407 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2409 /* The following choices should match those
2410 * in comm_cost_est in domdec_setup.c.
2411 * Note that here the checks have to take into account
2412 * that the decomposition might occur in a different order than xyz
2413 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2414 * in which case they will not match those in comm_cost_est,
2415 * but since that is mainly for testing purposes that's fine.
2417 if (ddGridSetup.numDDDimensions >= 2 &&
2418 ddGridSetup.ddDimensions[0] == XX &&
2419 ddGridSetup.ddDimensions[1] == YY &&
2420 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2421 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2422 getenv("GMX_PMEONEDD") == nullptr)
2424 ddRankSetup.npmedecompdim = 2;
2425 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2426 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2430 /* In case nc is 1 in both x and y we could still choose to
2431 * decompose pme in y instead of x, but we use x for simplicity.
2433 ddRankSetup.npmedecompdim = 1;
2434 if (ddGridSetup.ddDimensions[0] == YY)
2436 ddRankSetup.npmenodes_x = 1;
2437 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2441 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2442 ddRankSetup.npmenodes_y = 1;
2445 GMX_LOG(mdlog.info).appendTextFormatted(
2446 "PME domain decomposition: %d x %d x %d",
2447 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2451 ddRankSetup.npmedecompdim = 0;
2452 ddRankSetup.npmenodes_x = 0;
2453 ddRankSetup.npmenodes_y = 0;
2459 /*! \brief Set the cell size and interaction limits */
2460 static void set_dd_limits(const gmx::MDLogger &mdlog,
2461 t_commrec *cr, gmx_domdec_t *dd,
2462 const DomdecOptions &options,
2463 const DDSettings &ddSettings,
2464 const DDSystemInfo &systemInfo,
2465 const DDGridSetup &ddGridSetup,
2466 const int numPPRanks,
2467 const gmx_mtop_t *mtop,
2468 const t_inputrec *ir,
2469 const gmx_ddbox_t &ddbox)
2471 gmx_domdec_comm_t *comm = dd->comm;
2472 comm->ddSettings = ddSettings;
2474 /* Initialize to GPU share count to 0, might change later */
2475 comm->nrank_gpu_shared = 0;
2477 comm->dlbState = comm->ddSettings.initialDlbState;
2478 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2479 /* To consider turning DLB on after 2*nstlist steps we need to check
2480 * at partitioning count 3. Thus we need to increase the first count by 2.
2482 comm->ddPartioningCountFirstDlbOff += 2;
2484 comm->bPMELoadBalDLBLimits = FALSE;
2486 /* Allocate the charge group/atom sorting struct */
2487 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2489 comm->systemInfo = systemInfo;
2491 if (systemInfo.useUpdateGroups)
2493 /* Note: We would like to use dd->nnodes for the atom count estimate,
2494 * but that is not yet available here. But this anyhow only
2495 * affect performance up to the second dd_partition_system call.
2497 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2498 comm->updateGroupsCog =
2499 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2500 systemInfo.updateGroupingPerMoleculetype,
2501 maxReferenceTemperature(*ir),
2502 homeAtomCountEstimate);
2505 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2507 /* Set the DD setup given by ddGridSetup */
2508 copy_ivec(ddGridSetup.numDomains, dd->nc);
2509 dd->ndim = ddGridSetup.numDDDimensions;
2510 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2512 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2514 snew(comm->slb_frac, DIM);
2515 if (isDlbDisabled(comm))
2517 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2518 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2519 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2522 /* Set the multi-body cut-off and cellsize limit for DLB */
2523 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2524 comm->cellsize_limit = systemInfo.cellsizeLimit;
2525 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2527 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2529 /* Set the bonded communication distance to halfway
2530 * the minimum and the maximum,
2531 * since the extra communication cost is nearly zero.
2533 real acs = average_cellsize_min(ddbox, dd->nc);
2534 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2535 if (!isDlbDisabled(comm))
2537 /* Check if this does not limit the scaling */
2538 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2539 options.dlbScaling*acs);
2541 if (!systemInfo.filterBondedCommunication)
2543 /* Without bBondComm do not go beyond the n.b. cut-off */
2544 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2545 if (comm->cellsize_limit >= systemInfo.cutoff)
2547 /* We don't loose a lot of efficieny
2548 * when increasing it to the n.b. cut-off.
2549 * It can even be slightly faster, because we need
2550 * less checks for the communication setup.
2552 comm->cutoff_mbody = systemInfo.cutoff;
2555 /* Check if we did not end up below our original limit */
2556 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2557 systemInfo.minCutoffForMultiBody);
2559 if (comm->cutoff_mbody > comm->cellsize_limit)
2561 comm->cellsize_limit = comm->cutoff_mbody;
2564 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2569 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2570 "cellsize limit %f\n",
2571 gmx::boolToString(systemInfo.filterBondedCommunication),
2572 comm->cellsize_limit);
2577 check_dd_restrictions(dd, ir, mdlog);
2581 void dd_init_bondeds(FILE *fplog,
2583 const gmx_mtop_t *mtop,
2584 const gmx_vsite_t *vsite,
2585 const t_inputrec *ir,
2587 cginfo_mb_t *cginfo_mb)
2589 gmx_domdec_comm_t *comm;
2591 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2595 if (comm->systemInfo.filterBondedCommunication)
2597 /* Communicate atoms beyond the cut-off for bonded interactions */
2598 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2602 /* Only communicate atoms based on cut-off */
2603 comm->bondedLinks = nullptr;
2607 static void writeSettings(gmx::TextWriter *log,
2609 const gmx_mtop_t *mtop,
2610 const t_inputrec *ir,
2611 gmx_bool bDynLoadBal,
2613 const gmx_ddbox_t *ddbox)
2615 gmx_domdec_comm_t *comm;
2624 log->writeString("The maximum number of communication pulses is:");
2625 for (d = 0; d < dd->ndim; d++)
2627 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2629 log->ensureLineBreak();
2630 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2631 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2632 log->writeString("The allowed shrink of domain decomposition cells is:");
2633 for (d = 0; d < DIM; d++)
2637 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2644 comm->cellsize_min_dlb[d]/
2645 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2647 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2650 log->ensureLineBreak();
2654 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2655 log->writeString("The initial number of communication pulses is:");
2656 for (d = 0; d < dd->ndim; d++)
2658 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2660 log->ensureLineBreak();
2661 log->writeString("The initial domain decomposition cell size is:");
2662 for (d = 0; d < DIM; d++)
2666 log->writeStringFormatted(" %c %.2f nm",
2667 dim2char(d), dd->comm->cellsize_min[d]);
2670 log->ensureLineBreak();
2674 const bool haveInterDomainVsites =
2675 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2677 if (comm->systemInfo.haveInterDomainBondeds ||
2678 haveInterDomainVsites ||
2679 comm->systemInfo.haveSplitConstraints ||
2680 comm->systemInfo.haveSplitSettles)
2682 std::string decompUnits;
2683 if (comm->systemInfo.useUpdateGroups)
2685 decompUnits = "atom groups";
2689 decompUnits = "atoms";
2692 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2693 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2697 limit = dd->comm->cellsize_limit;
2701 if (dd->unitCellInfo.ddBoxIsDynamic)
2703 log->writeLine("(the following are initial values, they could change due to box deformation)");
2705 limit = dd->comm->cellsize_min[XX];
2706 for (d = 1; d < DIM; d++)
2708 limit = std::min(limit, dd->comm->cellsize_min[d]);
2712 if (comm->systemInfo.haveInterDomainBondeds)
2714 log->writeLineFormatted("%40s %-7s %6.3f nm",
2715 "two-body bonded interactions", "(-rdd)",
2716 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2717 log->writeLineFormatted("%40s %-7s %6.3f nm",
2718 "multi-body bonded interactions", "(-rdd)",
2719 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2721 if (haveInterDomainVsites)
2723 log->writeLineFormatted("%40s %-7s %6.3f nm",
2724 "virtual site constructions", "(-rcon)", limit);
2726 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2728 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2730 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2731 separation.c_str(), "(-rcon)", limit);
2733 log->ensureLineBreak();
2737 static void logSettings(const gmx::MDLogger &mdlog,
2739 const gmx_mtop_t *mtop,
2740 const t_inputrec *ir,
2742 const gmx_ddbox_t *ddbox)
2744 gmx::StringOutputStream stream;
2745 gmx::TextWriter log(&stream);
2746 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2747 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2750 log.ensureEmptyLine();
2751 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2753 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2755 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2758 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2761 const t_inputrec *ir,
2762 const gmx_ddbox_t *ddbox)
2764 gmx_domdec_comm_t *comm;
2765 int d, dim, npulse, npulse_d_max, npulse_d;
2770 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2772 /* Determine the maximum number of comm. pulses in one dimension */
2774 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2776 /* Determine the maximum required number of grid pulses */
2777 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2779 /* Only a single pulse is required */
2782 else if (!bNoCutOff && comm->cellsize_limit > 0)
2784 /* We round down slightly here to avoid overhead due to the latency
2785 * of extra communication calls when the cut-off
2786 * would be only slightly longer than the cell size.
2787 * Later cellsize_limit is redetermined,
2788 * so we can not miss interactions due to this rounding.
2790 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2794 /* There is no cell size limit */
2795 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2798 if (!bNoCutOff && npulse > 1)
2800 /* See if we can do with less pulses, based on dlb_scale */
2802 for (d = 0; d < dd->ndim; d++)
2805 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2806 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2807 npulse_d_max = std::max(npulse_d_max, npulse_d);
2809 npulse = std::min(npulse, npulse_d_max);
2812 /* This env var can override npulse */
2813 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2820 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2821 for (d = 0; d < dd->ndim; d++)
2823 if (comm->ddSettings.request1DAnd1Pulse)
2825 comm->cd[d].np_dlb = 1;
2829 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2830 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2832 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2834 comm->bVacDLBNoLimit = FALSE;
2838 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2839 if (!comm->bVacDLBNoLimit)
2841 comm->cellsize_limit = std::max(comm->cellsize_limit,
2842 comm->systemInfo.cutoff/comm->maxpulse);
2844 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2845 /* Set the minimum cell size for each DD dimension */
2846 for (d = 0; d < dd->ndim; d++)
2848 if (comm->bVacDLBNoLimit ||
2849 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2851 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2855 comm->cellsize_min_dlb[dd->dim[d]] =
2856 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2859 if (comm->cutoff_mbody <= 0)
2861 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2869 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2871 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2874 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2876 /* If each molecule is a single charge group
2877 * or we use domain decomposition for each periodic dimension,
2878 * we do not need to take pbc into account for the bonded interactions.
2880 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2883 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2886 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2887 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2888 gmx_domdec_t *dd, real dlb_scale,
2889 const gmx_mtop_t *mtop, const t_inputrec *ir,
2890 const gmx_ddbox_t *ddbox)
2892 gmx_domdec_comm_t *comm = dd->comm;
2893 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2895 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2897 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2898 if (ddRankSetup.npmedecompdim >= 2)
2900 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2905 ddRankSetup.numRanksDoingPme = 0;
2906 if (dd->pme_nodeid >= 0)
2908 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2909 "Can not have separate PME ranks without PME electrostatics");
2915 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2917 if (!isDlbDisabled(comm))
2919 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2922 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2925 if (ir->ePBC == epbcNONE)
2927 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2932 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2936 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2938 int natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2940 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2941 static_cast<int>(vol_frac*natoms_tot));
2944 /*! \brief Get some important DD parameters which can be modified by env.vars */
2946 getDDSettings(const gmx::MDLogger &mdlog,
2947 const DomdecOptions &options,
2948 const gmx::MdrunOptions &mdrunOptions,
2949 const t_inputrec &ir)
2951 DDSettings ddSettings;
2953 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2954 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2955 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2956 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2957 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2958 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2959 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2960 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2961 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2962 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2964 if (ddSettings.useSendRecv2)
2966 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2969 if (ddSettings.eFlop)
2971 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2972 ddSettings.recordLoad = true;
2976 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2979 ddSettings.initialDlbState =
2980 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2981 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2982 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2987 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2992 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2994 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2995 * generally requires a larger box than other possible decompositions
2996 * with the same rank count, so the calling code might need to decide
2997 * what is the most appropriate way to run the simulation based on
2998 * whether such a DD is possible.
3000 * This function works like init_domain_decomposition(), but will not
3001 * give a fatal error, and only uses \c cr for communicating between
3004 * It is safe to call before thread-MPI spawns ranks, so that
3005 * thread-MPI can decide whether and how to trigger the GPU halo
3006 * exchange code path. The number of PME ranks, if any, should be set
3007 * in \c options.numPmeRanks.
3010 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
3011 const t_commrec *cr,
3012 const int numRanksRequested,
3013 const DomdecOptions &options,
3014 const gmx_mtop_t &mtop,
3015 const t_inputrec &ir,
3017 gmx::ArrayRef<const gmx::RVec> xGlobal)
3019 // Ensure we don't write any output from this checking routine
3020 gmx::MDLogger dummyLogger;
3022 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, &mtop, &ir, box, xGlobal);
3024 int numPPRanksRequested = numRanksRequested - (EEL_PME(ir.coulombtype) ? options.numPmeRanks : 0);
3026 DDSettings ddSettings = ddSettingsOriginal;
3027 ddSettings.request1DAnd1Pulse = true;
3028 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
3029 !isDlbDisabled(ddSettings.initialDlbState),
3030 options.dlbScaling, ir,
3031 systemInfo.cellsizeLimit);
3032 gmx_ddbox_t ddbox = {0};
3033 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numPPRanksRequested, options,
3034 ddSettings, systemInfo, gridSetupCellsizeLimit,
3035 mtop, ir, box, xGlobal, &ddbox);
3037 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
3039 return canMakeDDWith1DAnd1Pulse;
3042 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
3044 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
3045 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
3046 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
3048 return (dd.comm->maxpulse == 1) && decompositionHasOneDimension;
3052 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
3054 const DomdecOptions &options,
3055 const gmx::MdrunOptions &mdrunOptions,
3056 const bool prefer1DAnd1Pulse,
3057 const gmx_mtop_t *mtop,
3058 const t_inputrec *ir,
3060 gmx::ArrayRef<const gmx::RVec> xGlobal,
3061 gmx::LocalAtomSetManager *atomSets)
3063 GMX_LOG(mdlog.info).appendTextFormatted(
3064 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3066 DDSettings ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3068 if (prefer1DAnd1Pulse &&
3069 canMake1DAnd1PulseDomainDecomposition(ddSettings, cr, cr->nnodes, options,
3070 *mtop, *ir, box, xGlobal))
3072 ddSettings.request1DAnd1Pulse = true;
3075 if (ddSettings.eFlop > 1)
3077 /* Ensure that we have different random flop counts on different ranks */
3078 srand(1 + cr->nodeid);
3081 DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3083 int numRanksRequested = cr->nnodes;
3084 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks);
3086 // DD grid setup uses a more different cell size limit for
3087 // automated setup than the one in systemInfo. The latter is used
3088 // in set_dd_limits() to configure DLB, for example.
3089 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse,
3090 !isDlbDisabled(ddSettings.initialDlbState),
3091 options.dlbScaling, *ir,
3092 systemInfo.cellsizeLimit);
3093 gmx_ddbox_t ddbox = {0};
3094 DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options,
3095 ddSettings, systemInfo, gridSetupCellsizeLimit,
3096 *mtop, *ir, box, xGlobal, &ddbox);
3097 checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox);
3099 cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3101 DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3103 /* Generate the group communicator, also decides the duty of each rank */
3104 ivec ddCellIndex = { 0, 0, 0 };
3105 std::vector<int> pmeRanks;
3106 CartesianRankSetup cartSetup =
3107 makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3109 ddCellIndex, &pmeRanks);
3111 gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3113 copy_ivec(ddCellIndex, dd->ci);
3115 dd->comm = init_dd_comm();
3117 dd->comm->ddRankSetup = ddRankSetup;
3118 dd->comm->cartesianRankSetup = cartSetup;
3120 set_dd_limits(mdlog, cr, dd, options,
3121 ddSettings, systemInfo,
3123 ddRankSetup.numPPRanks,
3127 setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, dd);
3129 if (thisRankHasDuty(cr, DUTY_PP))
3131 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3133 setup_neighbor_relations(dd);
3136 /* Set overallocation to avoid frequent reallocation of arrays */
3137 set_over_alloc_dd(TRUE);
3139 dd->atomSets = atomSets;
3144 static gmx_bool test_dd_cutoff(t_commrec *cr,
3146 gmx::ArrayRef<const gmx::RVec> x,
3147 real cutoffRequested)
3157 set_ddbox(*dd, false, box, true, x, &ddbox);
3161 for (d = 0; d < dd->ndim; d++)
3165 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3166 if (dd->unitCellInfo.ddBoxIsDynamic)
3168 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3171 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3173 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3175 if (np > dd->comm->cd[d].np_dlb)
3180 /* If a current local cell size is smaller than the requested
3181 * cut-off, we could still fix it, but this gets very complicated.
3182 * Without fixing here, we might actually need more checks.
3184 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3185 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3192 if (!isDlbDisabled(dd->comm))
3194 /* If DLB is not active yet, we don't need to check the grid jumps.
3195 * Actually we shouldn't, because then the grid jump data is not set.
3197 if (isDlbOn(dd->comm) &&
3198 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3203 gmx_sumi(1, &LocallyLimited, cr);
3205 if (LocallyLimited > 0)
3214 gmx_bool change_dd_cutoff(t_commrec *cr,
3216 gmx::ArrayRef<const gmx::RVec> x,
3217 real cutoffRequested)
3219 gmx_bool bCutoffAllowed;
3221 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3225 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3228 return bCutoffAllowed;