2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/options.h"
59 #include "gromacs/domdec/partition.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/gmxlib/nrnb.h"
62 #include "gromacs/gpu_utils/gpu_utils.h"
63 #include "gromacs/hardware/hw_info.h"
64 #include "gromacs/listed_forces/manage_threading.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/calc_verletbuf.h"
68 #include "gromacs/mdlib/constr.h"
69 #include "gromacs/mdlib/constraintrange.h"
70 #include "gromacs/mdlib/updategroups.h"
71 #include "gromacs/mdlib/vsite.h"
72 #include "gromacs/mdtypes/commrec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/mdrunoptions.h"
75 #include "gromacs/mdtypes/state.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/pulling/pull.h"
79 #include "gromacs/timing/wallcycle.h"
80 #include "gromacs/topology/block.h"
81 #include "gromacs/topology/idef.h"
82 #include "gromacs/topology/ifunc.h"
83 #include "gromacs/topology/mtop_lookup.h"
84 #include "gromacs/topology/mtop_util.h"
85 #include "gromacs/topology/topology.h"
86 #include "gromacs/utility/basedefinitions.h"
87 #include "gromacs/utility/basenetwork.h"
88 #include "gromacs/utility/cstringutil.h"
89 #include "gromacs/utility/exceptions.h"
90 #include "gromacs/utility/fatalerror.h"
91 #include "gromacs/utility/gmxmpi.h"
92 #include "gromacs/utility/logger.h"
93 #include "gromacs/utility/real.h"
94 #include "gromacs/utility/smalloc.h"
95 #include "gromacs/utility/strconvert.h"
96 #include "gromacs/utility/stringstream.h"
97 #include "gromacs/utility/stringutil.h"
98 #include "gromacs/utility/textwriter.h"
100 #include "atomdistribution.h"
102 #include "cellsizes.h"
103 #include "distribute.h"
104 #include "domdec_constraints.h"
105 #include "domdec_internal.h"
106 #include "domdec_vsite.h"
107 #include "redistribute.h"
110 // TODO remove this when moving domdec into gmx namespace
111 using gmx::DdRankOrder;
112 using gmx::DlbOption;
113 using gmx::DomdecOptions;
115 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
117 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
120 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
121 #define DD_FLAG_NRCG 65535
122 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
123 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
125 /* The DD zone order */
126 static const ivec dd_zo[DD_MAXZONE] =
127 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
129 /* The non-bonded zone-pair setup for domain decomposition
130 * The first number is the i-zone, the second number the first j-zone seen by
131 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
132 * As is, this is for 3D decomposition, where there are 4 i-zones.
133 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
134 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
137 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
144 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
146 static void index2xyz(ivec nc,int ind,ivec xyz)
148 xyz[XX] = ind % nc[XX];
149 xyz[YY] = (ind / nc[XX]) % nc[YY];
150 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
154 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
156 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
157 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
158 xyz[ZZ] = ind % nc[ZZ];
161 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
166 ddindex = dd_index(dd->nc, c);
167 if (dd->comm->bCartesianPP_PME)
169 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
171 else if (dd->comm->bCartesianPP)
174 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
185 int ddglatnr(const gmx_domdec_t *dd, int i)
195 if (i >= dd->comm->atomRanges.numAtomsTotal())
197 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
199 atnr = dd->globalAtomIndices[i] + 1;
205 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
207 return &dd->comm->cgs_gl;
210 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
212 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
213 return dd.comm->updateGroupingPerMoleculetype;
216 void dd_store_state(gmx_domdec_t *dd, t_state *state)
220 if (state->ddp_count != dd->ddp_count)
222 gmx_incons("The MD state does not match the domain decomposition state");
225 state->cg_gl.resize(dd->ncg_home);
226 for (i = 0; i < dd->ncg_home; i++)
228 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
231 state->ddp_count_cg_gl = dd->ddp_count;
234 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
236 return &dd->comm->zones;
239 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
240 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
242 gmx_domdec_zones_t *zones;
245 zones = &dd->comm->zones;
248 while (icg >= zones->izone[izone].cg1)
257 else if (izone < zones->nizone)
259 *jcg0 = zones->izone[izone].jcg0;
263 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
264 icg, izone, zones->nizone);
267 *jcg1 = zones->izone[izone].jcg1;
269 for (d = 0; d < dd->ndim; d++)
272 shift0[dim] = zones->izone[izone].shift0[dim];
273 shift1[dim] = zones->izone[izone].shift1[dim];
274 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
276 /* A conservative approach, this can be optimized */
283 int dd_numHomeAtoms(const gmx_domdec_t &dd)
285 return dd.comm->atomRanges.numHomeAtoms();
288 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
290 /* We currently set mdatoms entries for all atoms:
291 * local + non-local + communicated for vsite + constraints
294 return dd->comm->atomRanges.numAtomsTotal();
297 int dd_natoms_vsite(const gmx_domdec_t *dd)
299 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
302 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
304 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
305 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
308 void dd_move_x(gmx_domdec_t *dd,
310 gmx::ArrayRef<gmx::RVec> x,
311 gmx_wallcycle *wcycle)
313 wallcycle_start(wcycle, ewcMOVEX);
316 gmx_domdec_comm_t *comm;
317 gmx_domdec_comm_dim_t *cd;
318 rvec shift = {0, 0, 0};
319 gmx_bool bPBC, bScrew;
324 nat_tot = comm->atomRanges.numHomeAtoms();
325 for (int d = 0; d < dd->ndim; d++)
327 bPBC = (dd->ci[dd->dim[d]] == 0);
328 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
331 copy_rvec(box[dd->dim[d]], shift);
334 for (const gmx_domdec_ind_t &ind : cd->ind)
336 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
337 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
341 for (int j : ind.index)
343 sendBuffer[n] = x[j];
349 for (int j : ind.index)
351 /* We need to shift the coordinates */
352 for (int d = 0; d < DIM; d++)
354 sendBuffer[n][d] = x[j][d] + shift[d];
361 for (int j : ind.index)
364 sendBuffer[n][XX] = x[j][XX] + shift[XX];
366 * This operation requires a special shift force
367 * treatment, which is performed in calc_vir.
369 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
370 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
375 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
377 gmx::ArrayRef<gmx::RVec> receiveBuffer;
378 if (cd->receiveInPlace)
380 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
384 receiveBuffer = receiveBufferAccess.buffer;
386 /* Send and receive the coordinates */
387 ddSendrecv(dd, d, dddirBackward,
388 sendBuffer, receiveBuffer);
390 if (!cd->receiveInPlace)
393 for (int zone = 0; zone < nzone; zone++)
395 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
397 x[i] = receiveBuffer[j++];
401 nat_tot += ind.nrecv[nzone+1];
406 wallcycle_stop(wcycle, ewcMOVEX);
409 void dd_move_f(gmx_domdec_t *dd,
410 gmx::ArrayRef<gmx::RVec> f,
412 gmx_wallcycle *wcycle)
414 wallcycle_start(wcycle, ewcMOVEF);
417 gmx_domdec_comm_t *comm;
418 gmx_domdec_comm_dim_t *cd;
421 gmx_bool bShiftForcesNeedPbc, bScrew;
425 nzone = comm->zones.n/2;
426 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
427 for (int d = dd->ndim-1; d >= 0; d--)
429 /* Only forces in domains near the PBC boundaries need to
430 consider PBC in the treatment of fshift */
431 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
432 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
433 if (fshift == nullptr && !bScrew)
435 bShiftForcesNeedPbc = FALSE;
437 /* Determine which shift vector we need */
443 for (int p = cd->numPulses() - 1; p >= 0; p--)
445 const gmx_domdec_ind_t &ind = cd->ind[p];
446 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
447 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
449 nat_tot -= ind.nrecv[nzone+1];
451 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
453 gmx::ArrayRef<gmx::RVec> sendBuffer;
454 if (cd->receiveInPlace)
456 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
460 sendBuffer = sendBufferAccess.buffer;
462 for (int zone = 0; zone < nzone; zone++)
464 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
466 sendBuffer[j++] = f[i];
470 /* Communicate the forces */
471 ddSendrecv(dd, d, dddirForward,
472 sendBuffer, receiveBuffer);
473 /* Add the received forces */
475 if (!bShiftForcesNeedPbc)
477 for (int j : ind.index)
479 for (int d = 0; d < DIM; d++)
481 f[j][d] += receiveBuffer[n][d];
488 /* fshift should always be defined if this function is
489 * called when bShiftForcesNeedPbc is true */
490 assert(nullptr != fshift);
491 for (int j : ind.index)
493 for (int d = 0; d < DIM; d++)
495 f[j][d] += receiveBuffer[n][d];
497 /* Add this force to the shift force */
498 for (int d = 0; d < DIM; d++)
500 fshift[is][d] += receiveBuffer[n][d];
507 for (int j : ind.index)
509 /* Rotate the force */
510 f[j][XX] += receiveBuffer[n][XX];
511 f[j][YY] -= receiveBuffer[n][YY];
512 f[j][ZZ] -= receiveBuffer[n][ZZ];
515 /* Add this force to the shift force */
516 for (int d = 0; d < DIM; d++)
518 fshift[is][d] += receiveBuffer[n][d];
527 wallcycle_stop(wcycle, ewcMOVEF);
530 /* Convenience function for extracting a real buffer from an rvec buffer
532 * To reduce the number of temporary communication buffers and avoid
533 * cache polution, we reuse gmx::RVec buffers for storing reals.
534 * This functions return a real buffer reference with the same number
535 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
537 static gmx::ArrayRef<real>
538 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
540 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
544 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
547 gmx_domdec_comm_t *comm;
548 gmx_domdec_comm_dim_t *cd;
553 nat_tot = comm->atomRanges.numHomeAtoms();
554 for (int d = 0; d < dd->ndim; d++)
557 for (const gmx_domdec_ind_t &ind : cd->ind)
559 /* Note: We provision for RVec instead of real, so a factor of 3
560 * more than needed. The buffer actually already has this size
561 * and we pass a plain pointer below, so this does not matter.
563 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
564 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
566 for (int j : ind.index)
568 sendBuffer[n++] = v[j];
571 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
573 gmx::ArrayRef<real> receiveBuffer;
574 if (cd->receiveInPlace)
576 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
580 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
582 /* Send and receive the data */
583 ddSendrecv(dd, d, dddirBackward,
584 sendBuffer, receiveBuffer);
585 if (!cd->receiveInPlace)
588 for (int zone = 0; zone < nzone; zone++)
590 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
592 v[i] = receiveBuffer[j++];
596 nat_tot += ind.nrecv[nzone+1];
602 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
605 gmx_domdec_comm_t *comm;
606 gmx_domdec_comm_dim_t *cd;
610 nzone = comm->zones.n/2;
611 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
612 for (int d = dd->ndim-1; d >= 0; d--)
615 for (int p = cd->numPulses() - 1; p >= 0; p--)
617 const gmx_domdec_ind_t &ind = cd->ind[p];
619 /* Note: We provision for RVec instead of real, so a factor of 3
620 * more than needed. The buffer actually already has this size
621 * and we typecast, so this works as intended.
623 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
624 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
625 nat_tot -= ind.nrecv[nzone + 1];
627 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
629 gmx::ArrayRef<real> sendBuffer;
630 if (cd->receiveInPlace)
632 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
636 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
638 for (int zone = 0; zone < nzone; zone++)
640 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
642 sendBuffer[j++] = v[i];
646 /* Communicate the forces */
647 ddSendrecv(dd, d, dddirForward,
648 sendBuffer, receiveBuffer);
649 /* Add the received forces */
651 for (int j : ind.index)
653 v[j] += receiveBuffer[n];
661 real dd_cutoff_multibody(const gmx_domdec_t *dd)
663 gmx_domdec_comm_t *comm;
670 if (comm->haveInterDomainMultiBodyBondeds)
672 if (comm->cutoff_mbody > 0)
674 r = comm->cutoff_mbody;
678 /* cutoff_mbody=0 means we do not have DLB */
679 r = comm->cellsize_min[dd->dim[0]];
680 for (di = 1; di < dd->ndim; di++)
682 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
686 r = std::max(r, comm->cutoff_mbody);
690 r = std::min(r, comm->cutoff);
698 real dd_cutoff_twobody(const gmx_domdec_t *dd)
702 r_mb = dd_cutoff_multibody(dd);
704 return std::max(dd->comm->cutoff, r_mb);
708 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
713 nc = dd->nc[dd->comm->cartpmedim];
714 ntot = dd->comm->ntot[dd->comm->cartpmedim];
715 copy_ivec(coord, coord_pme);
716 coord_pme[dd->comm->cartpmedim] =
717 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
720 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
725 npme = dd->comm->npmenodes;
727 /* Here we assign a PME node to communicate with this DD node
728 * by assuming that the major index of both is x.
729 * We add cr->npmenodes/2 to obtain an even distribution.
731 return (ddindex*npme + npme/2)/npp;
734 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
739 snew(pme_rank, dd->comm->npmenodes);
741 for (i = 0; i < dd->nnodes; i++)
743 p0 = ddindex2pmeindex(dd, i);
744 p1 = ddindex2pmeindex(dd, i+1);
745 if (i+1 == dd->nnodes || p1 > p0)
749 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
751 pme_rank[n] = i + 1 + n;
759 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
767 if (dd->comm->bCartesian) {
768 gmx_ddindex2xyz(dd->nc,ddindex,coords);
769 dd_coords2pmecoords(dd,coords,coords_pme);
770 copy_ivec(dd->ntot,nc);
771 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
772 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
774 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
776 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
782 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
787 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
789 gmx_domdec_comm_t *comm;
791 int ddindex, nodeid = -1;
798 if (comm->bCartesianPP_PME)
801 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
806 ddindex = dd_index(cr->dd->nc, coords);
807 if (comm->bCartesianPP)
809 nodeid = comm->ddindex2simnodeid[ddindex];
815 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
827 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
828 const t_commrec gmx_unused *cr,
833 const gmx_domdec_comm_t *comm = dd->comm;
835 /* This assumes a uniform x domain decomposition grid cell size */
836 if (comm->bCartesianPP_PME)
839 ivec coord, coord_pme;
840 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
841 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
843 /* This is a PP node */
844 dd_cart_coord2pmecoord(dd, coord, coord_pme);
845 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
849 else if (comm->bCartesianPP)
851 if (sim_nodeid < dd->nnodes)
853 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
858 /* This assumes DD cells with identical x coordinates
859 * are numbered sequentially.
861 if (dd->comm->pmenodes == nullptr)
863 if (sim_nodeid < dd->nnodes)
865 /* The DD index equals the nodeid */
866 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
872 while (sim_nodeid > dd->comm->pmenodes[i])
876 if (sim_nodeid < dd->comm->pmenodes[i])
878 pmenode = dd->comm->pmenodes[i];
886 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
891 dd->comm->npmenodes_x, dd->comm->npmenodes_y
902 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
906 ivec coord, coord_pme;
910 std::vector<int> ddranks;
911 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
913 for (x = 0; x < dd->nc[XX]; x++)
915 for (y = 0; y < dd->nc[YY]; y++)
917 for (z = 0; z < dd->nc[ZZ]; z++)
919 if (dd->comm->bCartesianPP_PME)
924 dd_cart_coord2pmecoord(dd, coord, coord_pme);
925 if (dd->ci[XX] == coord_pme[XX] &&
926 dd->ci[YY] == coord_pme[YY] &&
927 dd->ci[ZZ] == coord_pme[ZZ])
929 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
934 /* The slab corresponds to the nodeid in the PME group */
935 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
937 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
946 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
948 gmx_bool bReceive = TRUE;
950 if (cr->npmenodes < dd->nnodes)
952 gmx_domdec_comm_t *comm = dd->comm;
953 if (comm->bCartesianPP_PME)
956 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
958 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
959 coords[comm->cartpmedim]++;
960 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
963 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
964 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
966 /* This is not the last PP node for pmenode */
971 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
976 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
977 if (cr->sim_nodeid+1 < cr->nnodes &&
978 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
980 /* This is not the last PP node for pmenode */
989 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
991 gmx_domdec_comm_t *comm;
996 snew(*dim_f, dd->nc[dim]+1);
998 for (i = 1; i < dd->nc[dim]; i++)
1000 if (comm->slb_frac[dim])
1002 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1006 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1009 (*dim_f)[dd->nc[dim]] = 1;
1012 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1014 int pmeindex, slab, nso, i;
1017 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1023 ddpme->dim = dimind;
1025 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1027 ddpme->nslab = (ddpme->dim == 0 ?
1028 dd->comm->npmenodes_x :
1029 dd->comm->npmenodes_y);
1031 if (ddpme->nslab <= 1)
1036 nso = dd->comm->npmenodes/ddpme->nslab;
1037 /* Determine for each PME slab the PP location range for dimension dim */
1038 snew(ddpme->pp_min, ddpme->nslab);
1039 snew(ddpme->pp_max, ddpme->nslab);
1040 for (slab = 0; slab < ddpme->nslab; slab++)
1042 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1043 ddpme->pp_max[slab] = 0;
1045 for (i = 0; i < dd->nnodes; i++)
1047 ddindex2xyz(dd->nc, i, xyz);
1048 /* For y only use our y/z slab.
1049 * This assumes that the PME x grid size matches the DD grid size.
1051 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1053 pmeindex = ddindex2pmeindex(dd, i);
1056 slab = pmeindex/nso;
1060 slab = pmeindex % ddpme->nslab;
1062 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1063 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1067 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1070 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1072 if (dd->comm->ddpme[0].dim == XX)
1074 return dd->comm->ddpme[0].maxshift;
1082 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1084 if (dd->comm->ddpme[0].dim == YY)
1086 return dd->comm->ddpme[0].maxshift;
1088 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1090 return dd->comm->ddpme[1].maxshift;
1098 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1100 /* Note that the cycles value can be incorrect, either 0 or some
1101 * extremely large value, when our thread migrated to another core
1102 * with an unsynchronized cycle counter. If this happens less often
1103 * that once per nstlist steps, this will not cause issues, since
1104 * we later subtract the maximum value from the sum over nstlist steps.
1105 * A zero count will slightly lower the total, but that's a small effect.
1106 * Note that the main purpose of the subtraction of the maximum value
1107 * is to avoid throwing off the load balancing when stalls occur due
1108 * e.g. system activity or network congestion.
1110 dd->comm->cycl[ddCycl] += cycles;
1111 dd->comm->cycl_n[ddCycl]++;
1112 if (cycles > dd->comm->cycl_max[ddCycl])
1114 dd->comm->cycl_max[ddCycl] = cycles;
1119 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1124 gmx_bool bPartOfGroup = FALSE;
1126 dim = dd->dim[dim_ind];
1127 copy_ivec(loc, loc_c);
1128 for (i = 0; i < dd->nc[dim]; i++)
1131 rank = dd_index(dd->nc, loc_c);
1132 if (rank == dd->rank)
1134 /* This process is part of the group */
1135 bPartOfGroup = TRUE;
1138 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1142 dd->comm->mpi_comm_load[dim_ind] = c_row;
1143 if (!isDlbDisabled(dd->comm))
1145 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1147 if (dd->ci[dim] == dd->master_ci[dim])
1149 /* This is the root process of this row */
1150 cellsizes.rowMaster = std::make_unique<RowMaster>();
1152 RowMaster &rowMaster = *cellsizes.rowMaster;
1153 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1154 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1155 rowMaster.isCellMin.resize(dd->nc[dim]);
1158 rowMaster.bounds.resize(dd->nc[dim]);
1160 rowMaster.buf_ncd.resize(dd->nc[dim]);
1164 /* This is not a root process, we only need to receive cell_f */
1165 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1168 if (dd->ci[dim] == dd->master_ci[dim])
1170 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1176 void dd_setup_dlb_resource_sharing(t_commrec *cr,
1180 int physicalnode_id_hash;
1182 MPI_Comm mpi_comm_pp_physicalnode;
1184 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1186 /* Only ranks with short-ranged tasks (currently) use GPUs.
1187 * If we don't have GPUs assigned, there are no resources to share.
1192 physicalnode_id_hash = gmx_physicalnode_id_hash();
1198 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1199 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1200 dd->rank, physicalnode_id_hash, gpu_id);
1202 /* Split the PP communicator over the physical nodes */
1203 /* TODO: See if we should store this (before), as it's also used for
1204 * for the nodecomm summation.
1206 // TODO PhysicalNodeCommunicator could be extended/used to handle
1207 // the need for per-node per-group communicators.
1208 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1209 &mpi_comm_pp_physicalnode);
1210 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1211 &dd->comm->mpi_comm_gpu_shared);
1212 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1213 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1217 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1220 /* Note that some ranks could share a GPU, while others don't */
1222 if (dd->comm->nrank_gpu_shared == 1)
1224 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1227 GMX_UNUSED_VALUE(cr);
1228 GMX_UNUSED_VALUE(gpu_id);
1232 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1235 int dim0, dim1, i, j;
1240 fprintf(debug, "Making load communicators\n");
1243 snew(dd->comm->load, std::max(dd->ndim, 1));
1244 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1252 make_load_communicator(dd, 0, loc);
1256 for (i = 0; i < dd->nc[dim0]; i++)
1259 make_load_communicator(dd, 1, loc);
1265 for (i = 0; i < dd->nc[dim0]; i++)
1269 for (j = 0; j < dd->nc[dim1]; j++)
1272 make_load_communicator(dd, 2, loc);
1279 fprintf(debug, "Finished making load communicators\n");
1284 /*! \brief Sets up the relation between neighboring domains and zones */
1285 static void setup_neighbor_relations(gmx_domdec_t *dd)
1287 int d, dim, i, j, m;
1289 gmx_domdec_zones_t *zones;
1290 gmx_domdec_ns_ranges_t *izone;
1292 for (d = 0; d < dd->ndim; d++)
1295 copy_ivec(dd->ci, tmp);
1296 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1297 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1298 copy_ivec(dd->ci, tmp);
1299 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1300 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1303 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1306 dd->neighbor[d][1]);
1310 int nzone = (1 << dd->ndim);
1311 int nizone = (1 << std::max(dd->ndim - 1, 0));
1312 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1314 zones = &dd->comm->zones;
1316 for (i = 0; i < nzone; i++)
1319 clear_ivec(zones->shift[i]);
1320 for (d = 0; d < dd->ndim; d++)
1322 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1327 for (i = 0; i < nzone; i++)
1329 for (d = 0; d < DIM; d++)
1331 s[d] = dd->ci[d] - zones->shift[i][d];
1336 else if (s[d] >= dd->nc[d])
1342 zones->nizone = nizone;
1343 for (i = 0; i < zones->nizone; i++)
1345 assert(ddNonbondedZonePairRanges[i][0] == i);
1347 izone = &zones->izone[i];
1348 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1349 * j-zones up to nzone.
1351 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1352 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1353 for (dim = 0; dim < DIM; dim++)
1355 if (dd->nc[dim] == 1)
1357 /* All shifts should be allowed */
1358 izone->shift0[dim] = -1;
1359 izone->shift1[dim] = 1;
1363 /* Determine the min/max j-zone shift wrt the i-zone */
1364 izone->shift0[dim] = 1;
1365 izone->shift1[dim] = -1;
1366 for (j = izone->j0; j < izone->j1; j++)
1368 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1369 if (shift_diff < izone->shift0[dim])
1371 izone->shift0[dim] = shift_diff;
1373 if (shift_diff > izone->shift1[dim])
1375 izone->shift1[dim] = shift_diff;
1382 if (!isDlbDisabled(dd->comm))
1384 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1387 if (dd->comm->bRecordLoad)
1389 make_load_communicators(dd);
1393 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1395 t_commrec gmx_unused *cr,
1396 bool gmx_unused reorder)
1399 gmx_domdec_comm_t *comm;
1406 if (comm->bCartesianPP)
1408 /* Set up cartesian communication for the particle-particle part */
1409 GMX_LOG(mdlog.info).appendTextFormatted(
1410 "Will use a Cartesian communicator: %d x %d x %d",
1411 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1413 for (int i = 0; i < DIM; i++)
1417 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1419 /* We overwrite the old communicator with the new cartesian one */
1420 cr->mpi_comm_mygroup = comm_cart;
1423 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1424 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1426 if (comm->bCartesianPP_PME)
1428 /* Since we want to use the original cartesian setup for sim,
1429 * and not the one after split, we need to make an index.
1431 snew(comm->ddindex2ddnodeid, dd->nnodes);
1432 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1433 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
1434 /* Get the rank of the DD master,
1435 * above we made sure that the master node is a PP node.
1445 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1447 else if (comm->bCartesianPP)
1449 if (cr->npmenodes == 0)
1451 /* The PP communicator is also
1452 * the communicator for this simulation
1454 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1456 cr->nodeid = dd->rank;
1458 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1460 /* We need to make an index to go from the coordinates
1461 * to the nodeid of this simulation.
1463 snew(comm->ddindex2simnodeid, dd->nnodes);
1464 snew(buf, dd->nnodes);
1465 if (thisRankHasDuty(cr, DUTY_PP))
1467 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1469 /* Communicate the ddindex to simulation nodeid index */
1470 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1471 cr->mpi_comm_mysim);
1474 /* Determine the master coordinates and rank.
1475 * The DD master should be the same node as the master of this sim.
1477 for (int i = 0; i < dd->nnodes; i++)
1479 if (comm->ddindex2simnodeid[i] == 0)
1481 ddindex2xyz(dd->nc, i, dd->master_ci);
1482 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1487 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1492 /* No Cartesian communicators */
1493 /* We use the rank in dd->comm->all as DD index */
1494 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1495 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1497 clear_ivec(dd->master_ci);
1501 GMX_LOG(mdlog.info).appendTextFormatted(
1502 "Domain decomposition rank %d, coordinates %d %d %d\n",
1503 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1507 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1508 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1512 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1516 gmx_domdec_comm_t *comm = dd->comm;
1518 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
1521 snew(comm->ddindex2simnodeid, dd->nnodes);
1522 snew(buf, dd->nnodes);
1523 if (thisRankHasDuty(cr, DUTY_PP))
1525 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1527 /* Communicate the ddindex to simulation nodeid index */
1528 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1529 cr->mpi_comm_mysim);
1533 GMX_UNUSED_VALUE(dd);
1534 GMX_UNUSED_VALUE(cr);
1538 static void split_communicator(const gmx::MDLogger &mdlog,
1539 t_commrec *cr, gmx_domdec_t *dd,
1540 DdRankOrder gmx_unused rankOrder,
1541 bool gmx_unused reorder)
1543 gmx_domdec_comm_t *comm;
1552 if (comm->bCartesianPP)
1554 for (i = 1; i < DIM; i++)
1556 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
1558 if (bDiv[YY] || bDiv[ZZ])
1560 comm->bCartesianPP_PME = TRUE;
1561 /* If we have 2D PME decomposition, which is always in x+y,
1562 * we stack the PME only nodes in z.
1563 * Otherwise we choose the direction that provides the thinnest slab
1564 * of PME only nodes as this will have the least effect
1565 * on the PP communication.
1566 * But for the PME communication the opposite might be better.
1568 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
1570 dd->nc[YY] > dd->nc[ZZ]))
1572 comm->cartpmedim = ZZ;
1576 comm->cartpmedim = YY;
1578 comm->ntot[comm->cartpmedim]
1579 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
1583 GMX_LOG(mdlog.info).appendTextFormatted(
1584 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1585 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
1586 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1590 if (comm->bCartesianPP_PME)
1596 GMX_LOG(mdlog.info).appendTextFormatted(
1597 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1598 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
1600 for (i = 0; i < DIM; i++)
1604 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
1606 MPI_Comm_rank(comm_cart, &rank);
1607 if (MASTER(cr) && rank != 0)
1609 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1612 /* With this assigment we loose the link to the original communicator
1613 * which will usually be MPI_COMM_WORLD, unless have multisim.
1615 cr->mpi_comm_mysim = comm_cart;
1616 cr->sim_nodeid = rank;
1618 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
1620 GMX_LOG(mdlog.info).appendTextFormatted(
1621 "Cartesian rank %d, coordinates %d %d %d\n",
1622 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1624 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1628 if (cr->npmenodes == 0 ||
1629 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
1631 cr->duty = DUTY_PME;
1634 /* Split the sim communicator into PP and PME only nodes */
1635 MPI_Comm_split(cr->mpi_comm_mysim,
1636 getThisRankDuties(cr),
1637 dd_index(comm->ntot, dd->ci),
1638 &cr->mpi_comm_mygroup);
1645 case DdRankOrder::pp_pme:
1646 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1648 case DdRankOrder::interleave:
1649 /* Interleave the PP-only and PME-only ranks */
1650 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1651 comm->pmenodes = dd_interleaved_pme_ranks(dd);
1653 case DdRankOrder::cartesian:
1656 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
1659 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
1661 cr->duty = DUTY_PME;
1668 /* Split the sim communicator into PP and PME only nodes */
1669 MPI_Comm_split(cr->mpi_comm_mysim,
1670 getThisRankDuties(cr),
1672 &cr->mpi_comm_mygroup);
1673 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1677 GMX_LOG(mdlog.info).appendTextFormatted(
1678 "This rank does only %s work.\n",
1679 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1682 /*! \brief Generates the MPI communicators for domain decomposition */
1683 static void make_dd_communicators(const gmx::MDLogger &mdlog,
1685 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
1687 gmx_domdec_comm_t *comm;
1692 copy_ivec(dd->nc, comm->ntot);
1694 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1695 comm->bCartesianPP_PME = FALSE;
1697 /* Reorder the nodes by default. This might change the MPI ranks.
1698 * Real reordering is only supported on very few architectures,
1699 * Blue Gene is one of them.
1701 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
1703 if (cr->npmenodes > 0)
1705 /* Split the communicator into a PP and PME part */
1706 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
1707 if (comm->bCartesianPP_PME)
1709 /* We (possibly) reordered the nodes in split_communicator,
1710 * so it is no longer required in make_pp_communicator.
1712 CartReorder = FALSE;
1717 /* All nodes do PP and PME */
1719 /* We do not require separate communicators */
1720 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1724 if (thisRankHasDuty(cr, DUTY_PP))
1726 /* Copy or make a new PP communicator */
1727 make_pp_communicator(mdlog, dd, cr, CartReorder);
1731 receive_ddindex2simnodeid(dd, cr);
1734 if (!thisRankHasDuty(cr, DUTY_PME))
1736 /* Set up the commnuication to our PME node */
1737 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1738 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
1741 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1742 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1747 dd->pme_nodeid = -1;
1750 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1753 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1755 comm->cgs_gl.index[comm->cgs_gl.nr]);
1759 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1760 const char *dir, int nc, const char *size_string)
1762 real *slb_frac, tot;
1767 if (nc > 1 && size_string != nullptr)
1769 GMX_LOG(mdlog.info).appendTextFormatted(
1770 "Using static load balancing for the %s direction", dir);
1773 for (i = 0; i < nc; i++)
1776 sscanf(size_string, "%20lf%n", &dbl, &n);
1779 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1786 std::string relativeCellSizes = "Relative cell sizes:";
1787 for (i = 0; i < nc; i++)
1790 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1792 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1798 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1801 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1803 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1805 for (auto &ilist : extractILists(*ilists, IF_BOND))
1807 if (NRAL(ilist.functionType) > 2)
1809 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1817 static int dd_getenv(const gmx::MDLogger &mdlog,
1818 const char *env_var, int def)
1824 val = getenv(env_var);
1827 if (sscanf(val, "%20d", &nst) <= 0)
1831 GMX_LOG(mdlog.info).appendTextFormatted(
1832 "Found env.var. %s = %s, using value %d",
1839 static void check_dd_restrictions(const gmx_domdec_t *dd,
1840 const t_inputrec *ir,
1841 const gmx::MDLogger &mdlog)
1843 if (ir->ePBC == epbcSCREW &&
1844 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1846 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1849 if (ir->ns_type == ensSIMPLE)
1851 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1854 if (ir->nstlist == 0)
1856 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1859 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1861 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1865 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
1870 r = ddbox->box_size[XX];
1871 for (di = 0; di < dd->ndim; di++)
1874 /* Check using the initial average cell size */
1875 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
1881 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1883 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1884 const std::string &reasonStr,
1885 const gmx::MDLogger &mdlog)
1887 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1888 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1890 if (cmdlineDlbState == DlbState::onUser)
1892 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1894 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1896 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1898 return DlbState::offForever;
1901 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1903 * This function parses the parameters of "-dlb" command line option setting
1904 * corresponding state values. Then it checks the consistency of the determined
1905 * state with other run parameters and settings. As a result, the initial state
1906 * may be altered or an error may be thrown if incompatibility of options is detected.
1908 * \param [in] mdlog Logger.
1909 * \param [in] dlbOption Enum value for the DLB option.
1910 * \param [in] bRecordLoad True if the load balancer is recording load information.
1911 * \param [in] mdrunOptions Options for mdrun.
1912 * \param [in] ir Pointer mdrun to input parameters.
1913 * \returns DLB initial/startup state.
1915 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1916 DlbOption dlbOption, gmx_bool bRecordLoad,
1917 const gmx::MdrunOptions &mdrunOptions,
1918 const t_inputrec *ir)
1920 DlbState dlbState = DlbState::offCanTurnOn;
1924 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1925 case DlbOption::no: dlbState = DlbState::offUser; break;
1926 case DlbOption::yes: dlbState = DlbState::onUser; break;
1927 default: gmx_incons("Invalid dlbOption enum value");
1930 /* Reruns don't support DLB: bail or override auto mode */
1931 if (mdrunOptions.rerun)
1933 std::string reasonStr = "it is not supported in reruns.";
1934 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1937 /* Unsupported integrators */
1938 if (!EI_DYNAMICS(ir->eI))
1940 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1941 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1944 /* Without cycle counters we can't time work to balance on */
1947 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1948 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1951 if (mdrunOptions.reproducible)
1953 std::string reasonStr = "you started a reproducible run.";
1956 case DlbState::offUser:
1958 case DlbState::offForever:
1959 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1961 case DlbState::offCanTurnOn:
1962 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1963 case DlbState::onCanTurnOff:
1964 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1966 case DlbState::onUser:
1967 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1969 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1976 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
1979 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
1981 /* Decomposition order z,y,x */
1982 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
1983 for (int dim = DIM-1; dim >= 0; dim--)
1985 if (dd->nc[dim] > 1)
1987 dd->dim[dd->ndim++] = dim;
1993 /* Decomposition order x,y,z */
1994 for (int dim = 0; dim < DIM; dim++)
1996 if (dd->nc[dim] > 1)
1998 dd->dim[dd->ndim++] = dim;
2005 /* Set dim[0] to avoid extra checks on ndim in several places */
2010 static gmx_domdec_comm_t *init_dd_comm()
2012 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2014 comm->n_load_have = 0;
2015 comm->n_load_collect = 0;
2017 comm->haveTurnedOffDlb = false;
2019 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2021 comm->sum_nat[i] = 0;
2025 comm->load_step = 0;
2028 clear_ivec(comm->load_lim);
2032 /* This should be replaced by a unique pointer */
2033 comm->balanceRegion = ddBalanceRegionAllocate();
2038 /* Returns whether mtop contains constraints and/or vsites */
2039 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2041 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2043 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2045 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2054 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2055 const gmx_mtop_t &mtop,
2056 const t_inputrec &inputrec,
2058 int numMpiRanksTotal,
2059 gmx_domdec_comm_t *comm)
2061 /* When we have constraints and/or vsites, it is beneficial to use
2062 * update groups (when possible) to allow independent update of groups.
2064 if (!systemHasConstraintsOrVsites(mtop))
2066 /* No constraints or vsites, atoms can be updated independently */
2070 comm->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2071 comm->useUpdateGroups =
2072 (!comm->updateGroupingPerMoleculetype.empty() &&
2073 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2075 if (comm->useUpdateGroups)
2077 int numUpdateGroups = 0;
2078 for (const auto &molblock : mtop.molblock)
2080 numUpdateGroups += molblock.nmol*comm->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2083 /* Note: We would like to use dd->nnodes for the atom count estimate,
2084 * but that is not yet available here. But this anyhow only
2085 * affect performance up to the second dd_partition_system call.
2087 int homeAtomCountEstimate = mtop.natoms/numMpiRanksTotal;
2088 comm->updateGroupsCog =
2089 std::make_unique<gmx::UpdateGroupsCog>(mtop,
2090 comm->updateGroupingPerMoleculetype,
2091 maxReferenceTemperature(inputrec),
2092 homeAtomCountEstimate);
2094 /* To use update groups, the large domain-to-domain cutoff distance
2095 * should be compatible with the box size.
2097 comm->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*comm, 0) < cutoffMargin);
2099 if (comm->useUpdateGroups)
2101 GMX_LOG(mdlog.info).appendTextFormatted(
2102 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2104 mtop.natoms/static_cast<double>(numUpdateGroups),
2105 comm->updateGroupsCog->maxUpdateGroupRadius());
2109 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2110 comm->updateGroupingPerMoleculetype.clear();
2111 comm->updateGroupsCog.reset(nullptr);
2116 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2117 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
2118 t_commrec *cr, gmx_domdec_t *dd,
2119 const DomdecOptions &options,
2120 const gmx::MdrunOptions &mdrunOptions,
2121 const gmx_mtop_t *mtop,
2122 const t_inputrec *ir,
2124 gmx::ArrayRef<const gmx::RVec> xGlobal,
2128 real r_bonded_limit = -1;
2129 const real tenPercentMargin = 1.1;
2130 gmx_domdec_comm_t *comm = dd->comm;
2132 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
2133 dd->numBoundedDimensions = inputrec2nboundeddim(ir);
2134 dd->haveDynamicBox = inputrecDynamicBox(ir);
2135 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
2137 dd->pme_recv_f_alloc = 0;
2138 dd->pme_recv_f_buf = nullptr;
2140 /* Initialize to GPU share count to 0, might change later */
2141 comm->nrank_gpu_shared = 0;
2143 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
2144 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2145 /* To consider turning DLB on after 2*nstlist steps we need to check
2146 * at partitioning count 3. Thus we need to increase the first count by 2.
2148 comm->ddPartioningCountFirstDlbOff += 2;
2150 GMX_LOG(mdlog.info).appendTextFormatted(
2151 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
2153 comm->bPMELoadBalDLBLimits = FALSE;
2155 /* Allocate the charge group/atom sorting struct */
2156 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2158 /* We need to decide on update groups early, as this affects communication distances */
2159 comm->useUpdateGroups = false;
2160 if (ir->cutoff_scheme == ecutsVERLET)
2162 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2163 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, cr->nnodes, comm);
2166 // TODO: Check whether all bondeds are within update groups
2167 comm->haveInterDomainBondeds = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2168 mtop->bIntermolecularInteractions);
2169 comm->haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2171 if (comm->useUpdateGroups)
2173 dd->splitConstraints = false;
2174 dd->splitSettles = false;
2178 dd->splitConstraints = gmx::inter_charge_group_constraints(*mtop);
2179 dd->splitSettles = gmx::inter_charge_group_settles(*mtop);
2184 /* Set the cut-off to some very large value,
2185 * so we don't need if statements everywhere in the code.
2186 * We use sqrt, since the cut-off is squared in some places.
2188 comm->cutoff = GMX_CUTOFF_INF;
2192 comm->cutoff = atomToAtomIntoDomainToDomainCutoff(*comm, ir->rlist);
2194 comm->cutoff_mbody = 0;
2196 /* Determine the minimum cell size limit, affected by many factors */
2197 comm->cellsize_limit = 0;
2198 comm->bBondComm = FALSE;
2200 /* We do not allow home atoms to move beyond the neighboring domain
2201 * between domain decomposition steps, which limits the cell size.
2202 * Get an estimate of cell size limit due to atom displacement.
2203 * In most cases this is a large overestimate, because it assumes
2204 * non-interaction atoms.
2205 * We set the chance to 1 in a trillion steps.
2207 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2208 const real limitForAtomDisplacement =
2209 minCellSizeForAtomDisplacement(*mtop, *ir,
2210 comm->updateGroupingPerMoleculetype,
2211 c_chanceThatAtomMovesBeyondDomain);
2212 GMX_LOG(mdlog.info).appendTextFormatted(
2213 "Minimum cell size due to atom displacement: %.3f nm",
2214 limitForAtomDisplacement);
2216 comm->cellsize_limit = std::max(comm->cellsize_limit,
2217 limitForAtomDisplacement);
2219 /* TODO: PME decomposition currently requires atoms not to be more than
2220 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2221 * In nearly all cases, limitForAtomDisplacement will be smaller
2222 * than 2/3*rlist, so the PME requirement is satisfied.
2223 * But it would be better for both correctness and performance
2224 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2225 * Note that we would need to improve the pairlist buffer case.
2228 if (comm->haveInterDomainBondeds)
2230 if (options.minimumCommunicationRange > 0)
2232 comm->cutoff_mbody = atomToAtomIntoDomainToDomainCutoff(*comm, options.minimumCommunicationRange);
2233 if (options.useBondedCommunication)
2235 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
2239 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
2241 r_bonded_limit = comm->cutoff_mbody;
2243 else if (ir->bPeriodicMols)
2245 /* Can not easily determine the required cut-off */
2246 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2247 comm->cutoff_mbody = comm->cutoff/2;
2248 r_bonded_limit = comm->cutoff_mbody;
2256 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2257 options.checkBondedInteractions,
2260 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2261 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2263 /* We use an initial margin of 10% for the minimum cell size,
2264 * except when we are just below the non-bonded cut-off.
2266 if (options.useBondedCommunication)
2268 if (std::max(r_2b, r_mb) > comm->cutoff)
2270 r_bonded = std::max(r_2b, r_mb);
2271 r_bonded_limit = tenPercentMargin*r_bonded;
2272 comm->bBondComm = TRUE;
2277 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
2279 /* We determine cutoff_mbody later */
2283 /* No special bonded communication,
2284 * simply increase the DD cut-off.
2286 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
2287 comm->cutoff_mbody = r_bonded_limit;
2288 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
2291 GMX_LOG(mdlog.info).appendTextFormatted(
2292 "Minimum cell size due to bonded interactions: %.3f nm",
2295 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
2299 if (dd->splitConstraints && options.constraintCommunicationRange <= 0)
2301 /* There is a cell size limit due to the constraints (P-LINCS) */
2302 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
2303 GMX_LOG(mdlog.info).appendTextFormatted(
2304 "Estimated maximum distance required for P-LINCS: %.3f nm",
2306 if (rconstr > comm->cellsize_limit)
2308 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2311 else if (options.constraintCommunicationRange > 0)
2313 /* Here we do not check for dd->splitConstraints.
2314 * because one can also set a cell size limit for virtual sites only
2315 * and at this point we don't know yet if there are intercg v-sites.
2317 GMX_LOG(mdlog.info).appendTextFormatted(
2318 "User supplied maximum distance required for P-LINCS: %.3f nm",
2319 options.constraintCommunicationRange);
2320 rconstr = options.constraintCommunicationRange;
2322 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
2324 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2326 if (options.numCells[XX] > 0)
2328 copy_ivec(options.numCells, dd->nc);
2329 set_dd_dim(mdlog, dd);
2330 set_ddbox_cr(*cr, &dd->nc, *ir, box, xGlobal, ddbox);
2332 if (options.numPmeRanks >= 0)
2334 cr->npmenodes = options.numPmeRanks;
2338 /* When the DD grid is set explicitly and -npme is set to auto,
2339 * don't use PME ranks. We check later if the DD grid is
2340 * compatible with the total number of ranks.
2345 real acs = average_cellsize_min(dd, ddbox);
2346 if (acs < comm->cellsize_limit)
2348 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2349 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2350 acs, comm->cellsize_limit);
2355 set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2357 /* We need to choose the optimal DD grid and possibly PME nodes */
2359 dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
2360 options.numPmeRanks,
2361 !isDlbDisabled(comm),
2363 comm->cellsize_limit, comm->cutoff,
2364 comm->haveInterDomainBondeds);
2366 if (dd->nc[XX] == 0)
2369 gmx_bool bC = (dd->splitConstraints && rconstr > r_bonded_limit);
2370 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2371 !bC ? "-rdd" : "-rcon",
2372 comm->dlbState != DlbState::offUser ? " or -dds" : "",
2373 bC ? " or your LINCS settings" : "");
2375 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2376 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2378 "Look in the log file for details on the domain decomposition",
2379 cr->nnodes-cr->npmenodes, limit, buf);
2381 set_dd_dim(mdlog, dd);
2384 GMX_LOG(mdlog.info).appendTextFormatted(
2385 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2386 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
2388 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2389 if (cr->nnodes - dd->nnodes != cr->npmenodes)
2391 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2392 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2393 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
2395 if (cr->npmenodes > dd->nnodes)
2397 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2398 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
2400 if (cr->npmenodes > 0)
2402 comm->npmenodes = cr->npmenodes;
2406 comm->npmenodes = dd->nnodes;
2409 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2411 /* The following choices should match those
2412 * in comm_cost_est in domdec_setup.c.
2413 * Note that here the checks have to take into account
2414 * that the decomposition might occur in a different order than xyz
2415 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2416 * in which case they will not match those in comm_cost_est,
2417 * but since that is mainly for testing purposes that's fine.
2419 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
2420 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
2421 getenv("GMX_PMEONEDD") == nullptr)
2423 comm->npmedecompdim = 2;
2424 comm->npmenodes_x = dd->nc[XX];
2425 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
2429 /* In case nc is 1 in both x and y we could still choose to
2430 * decompose pme in y instead of x, but we use x for simplicity.
2432 comm->npmedecompdim = 1;
2433 if (dd->dim[0] == YY)
2435 comm->npmenodes_x = 1;
2436 comm->npmenodes_y = comm->npmenodes;
2440 comm->npmenodes_x = comm->npmenodes;
2441 comm->npmenodes_y = 1;
2444 GMX_LOG(mdlog.info).appendTextFormatted(
2445 "PME domain decomposition: %d x %d x %d",
2446 comm->npmenodes_x, comm->npmenodes_y, 1);
2450 comm->npmedecompdim = 0;
2451 comm->npmenodes_x = 0;
2452 comm->npmenodes_y = 0;
2455 snew(comm->slb_frac, DIM);
2456 if (isDlbDisabled(comm))
2458 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2459 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2460 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2463 if (comm->haveInterDomainBondeds && comm->cutoff_mbody == 0)
2465 if (comm->bBondComm || !isDlbDisabled(comm))
2467 /* Set the bonded communication distance to halfway
2468 * the minimum and the maximum,
2469 * since the extra communication cost is nearly zero.
2471 real acs = average_cellsize_min(dd, ddbox);
2472 comm->cutoff_mbody = 0.5*(r_bonded + acs);
2473 if (!isDlbDisabled(comm))
2475 /* Check if this does not limit the scaling */
2476 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2477 options.dlbScaling*acs);
2479 if (!comm->bBondComm)
2481 /* Without bBondComm do not go beyond the n.b. cut-off */
2482 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
2483 if (comm->cellsize_limit >= comm->cutoff)
2485 /* We don't loose a lot of efficieny
2486 * when increasing it to the n.b. cut-off.
2487 * It can even be slightly faster, because we need
2488 * less checks for the communication setup.
2490 comm->cutoff_mbody = comm->cutoff;
2493 /* Check if we did not end up below our original limit */
2494 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
2496 if (comm->cutoff_mbody > comm->cellsize_limit)
2498 comm->cellsize_limit = comm->cutoff_mbody;
2501 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2506 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2507 "cellsize limit %f\n",
2508 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
2513 check_dd_restrictions(dd, ir, mdlog);
2517 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2522 ncg = ncg_mtop(mtop);
2523 snew(bLocalCG, ncg);
2524 for (cg = 0; cg < ncg; cg++)
2526 bLocalCG[cg] = FALSE;
2532 void dd_init_bondeds(FILE *fplog,
2534 const gmx_mtop_t *mtop,
2535 const gmx_vsite_t *vsite,
2536 const t_inputrec *ir,
2537 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2539 gmx_domdec_comm_t *comm;
2541 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2545 if (comm->bBondComm)
2547 /* Communicate atoms beyond the cut-off for bonded interactions */
2550 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2552 comm->bLocalCG = init_bLocalCG(mtop);
2556 /* Only communicate atoms based on cut-off */
2557 comm->cglink = nullptr;
2558 comm->bLocalCG = nullptr;
2562 static void writeSettings(gmx::TextWriter *log,
2564 const gmx_mtop_t *mtop,
2565 const t_inputrec *ir,
2566 gmx_bool bDynLoadBal,
2568 const gmx_ddbox_t *ddbox)
2570 gmx_domdec_comm_t *comm;
2579 log->writeString("The maximum number of communication pulses is:");
2580 for (d = 0; d < dd->ndim; d++)
2582 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2584 log->ensureLineBreak();
2585 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2586 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2587 log->writeString("The allowed shrink of domain decomposition cells is:");
2588 for (d = 0; d < DIM; d++)
2592 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2599 comm->cellsize_min_dlb[d]/
2600 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2602 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2605 log->ensureLineBreak();
2609 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2610 log->writeString("The initial number of communication pulses is:");
2611 for (d = 0; d < dd->ndim; d++)
2613 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2615 log->ensureLineBreak();
2616 log->writeString("The initial domain decomposition cell size is:");
2617 for (d = 0; d < DIM; d++)
2621 log->writeStringFormatted(" %c %.2f nm",
2622 dim2char(d), dd->comm->cellsize_min[d]);
2625 log->ensureLineBreak();
2629 const bool haveInterDomainVsites =
2630 (countInterUpdategroupVsites(*mtop, comm->updateGroupingPerMoleculetype) != 0);
2632 if (comm->haveInterDomainBondeds ||
2633 haveInterDomainVsites ||
2634 dd->splitConstraints || dd->splitSettles)
2636 std::string decompUnits;
2637 if (comm->useUpdateGroups)
2639 decompUnits = "atom groups";
2643 decompUnits = "atoms";
2646 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2647 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
2651 limit = dd->comm->cellsize_limit;
2655 if (dynamic_dd_box(*dd))
2657 log->writeLine("(the following are initial values, they could change due to box deformation)");
2659 limit = dd->comm->cellsize_min[XX];
2660 for (d = 1; d < DIM; d++)
2662 limit = std::min(limit, dd->comm->cellsize_min[d]);
2666 if (comm->haveInterDomainBondeds)
2668 log->writeLineFormatted("%40s %-7s %6.3f nm",
2669 "two-body bonded interactions", "(-rdd)",
2670 std::max(comm->cutoff, comm->cutoff_mbody));
2671 log->writeLineFormatted("%40s %-7s %6.3f nm",
2672 "multi-body bonded interactions", "(-rdd)",
2673 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
2675 if (haveInterDomainVsites)
2677 log->writeLineFormatted("%40s %-7s %6.3f nm",
2678 "virtual site constructions", "(-rcon)", limit);
2680 if (dd->splitConstraints || dd->splitSettles)
2682 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2684 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2685 separation.c_str(), "(-rcon)", limit);
2687 log->ensureLineBreak();
2691 static void logSettings(const gmx::MDLogger &mdlog,
2693 const gmx_mtop_t *mtop,
2694 const t_inputrec *ir,
2696 const gmx_ddbox_t *ddbox)
2698 gmx::StringOutputStream stream;
2699 gmx::TextWriter log(&stream);
2700 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2701 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2704 log.ensureEmptyLine();
2705 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2707 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2709 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2712 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2715 const t_inputrec *ir,
2716 const gmx_ddbox_t *ddbox)
2718 gmx_domdec_comm_t *comm;
2719 int d, dim, npulse, npulse_d_max, npulse_d;
2724 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2726 /* Determine the maximum number of comm. pulses in one dimension */
2728 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2730 /* Determine the maximum required number of grid pulses */
2731 if (comm->cellsize_limit >= comm->cutoff)
2733 /* Only a single pulse is required */
2736 else if (!bNoCutOff && comm->cellsize_limit > 0)
2738 /* We round down slightly here to avoid overhead due to the latency
2739 * of extra communication calls when the cut-off
2740 * would be only slightly longer than the cell size.
2741 * Later cellsize_limit is redetermined,
2742 * so we can not miss interactions due to this rounding.
2744 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
2748 /* There is no cell size limit */
2749 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2752 if (!bNoCutOff && npulse > 1)
2754 /* See if we can do with less pulses, based on dlb_scale */
2756 for (d = 0; d < dd->ndim; d++)
2759 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
2760 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2761 npulse_d_max = std::max(npulse_d_max, npulse_d);
2763 npulse = std::min(npulse, npulse_d_max);
2766 /* This env var can override npulse */
2767 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2774 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2775 for (d = 0; d < dd->ndim; d++)
2777 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2778 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2779 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2781 comm->bVacDLBNoLimit = FALSE;
2785 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2786 if (!comm->bVacDLBNoLimit)
2788 comm->cellsize_limit = std::max(comm->cellsize_limit,
2789 comm->cutoff/comm->maxpulse);
2791 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2792 /* Set the minimum cell size for each DD dimension */
2793 for (d = 0; d < dd->ndim; d++)
2795 if (comm->bVacDLBNoLimit ||
2796 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
2798 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2802 comm->cellsize_min_dlb[dd->dim[d]] =
2803 comm->cutoff/comm->cd[d].np_dlb;
2806 if (comm->cutoff_mbody <= 0)
2808 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
2816 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2818 /* If each molecule is a single charge group
2819 * or we use domain decomposition for each periodic dimension,
2820 * we do not need to take pbc into account for the bonded interactions.
2822 return (ePBC != epbcNONE && dd->comm->haveInterDomainBondeds &&
2825 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2828 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2829 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2830 gmx_domdec_t *dd, real dlb_scale,
2831 const gmx_mtop_t *mtop, const t_inputrec *ir,
2832 const gmx_ddbox_t *ddbox)
2834 gmx_domdec_comm_t *comm;
2840 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2842 init_ddpme(dd, &comm->ddpme[0], 0);
2843 if (comm->npmedecompdim >= 2)
2845 init_ddpme(dd, &comm->ddpme[1], 1);
2850 comm->npmenodes = 0;
2851 if (dd->pme_nodeid >= 0)
2853 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2854 "Can not have separate PME ranks without PME electrostatics");
2860 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
2862 if (!isDlbDisabled(comm))
2864 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2867 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2869 if (ir->ePBC == epbcNONE)
2871 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2876 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
2880 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2882 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2884 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2885 static_cast<int>(vol_frac*natoms_tot));
2888 /*! \brief Set some important DD parameters that can be modified by env.vars */
2889 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
2890 gmx_domdec_t *dd, int rank_mysim)
2892 gmx_domdec_comm_t *comm = dd->comm;
2894 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2895 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2896 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2897 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2898 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2899 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2900 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2904 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2909 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2910 if (comm->eFlop > 1)
2912 srand(1 + rank_mysim);
2914 comm->bRecordLoad = TRUE;
2918 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
2922 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
2924 const DomdecOptions &options,
2925 const gmx::MdrunOptions &mdrunOptions,
2926 const gmx_mtop_t *mtop,
2927 const t_inputrec *ir,
2929 gmx::ArrayRef<const gmx::RVec> xGlobal,
2930 gmx::LocalAtomSetManager *atomSets)
2934 GMX_LOG(mdlog.info).appendTextFormatted(
2935 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
2937 dd = new gmx_domdec_t;
2939 dd->comm = init_dd_comm();
2941 /* Initialize DD paritioning counters */
2942 dd->comm->partition_step = INT_MIN;
2945 set_dd_envvar_options(mdlog, dd, cr->nodeid);
2947 gmx_ddbox_t ddbox = {0};
2948 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
2953 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
2955 if (thisRankHasDuty(cr, DUTY_PP))
2957 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
2959 setup_neighbor_relations(dd);
2962 /* Set overallocation to avoid frequent reallocation of arrays */
2963 set_over_alloc_dd(TRUE);
2965 clear_dd_cycle_counts(dd);
2967 dd->atomSets = atomSets;
2972 static gmx_bool test_dd_cutoff(t_commrec *cr,
2973 const t_state &state,
2974 real cutoffRequested)
2984 set_ddbox(*dd, false, state.box, true, state.x, &ddbox);
2988 for (d = 0; d < dd->ndim; d++)
2992 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
2993 if (dynamic_dd_box(*dd))
2995 inv_cell_size *= DD_PRES_SCALE_MARGIN;
2998 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3000 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3002 if (np > dd->comm->cd[d].np_dlb)
3007 /* If a current local cell size is smaller than the requested
3008 * cut-off, we could still fix it, but this gets very complicated.
3009 * Without fixing here, we might actually need more checks.
3011 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3012 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3019 if (!isDlbDisabled(dd->comm))
3021 /* If DLB is not active yet, we don't need to check the grid jumps.
3022 * Actually we shouldn't, because then the grid jump data is not set.
3024 if (isDlbOn(dd->comm) &&
3025 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3030 gmx_sumi(1, &LocallyLimited, cr);
3032 if (LocallyLimited > 0)
3041 gmx_bool change_dd_cutoff(t_commrec *cr,
3042 const t_state &state,
3043 real cutoffRequested)
3045 gmx_bool bCutoffAllowed;
3047 bCutoffAllowed = test_dd_cutoff(cr, state, cutoffRequested);
3051 cr->dd->comm->cutoff = cutoffRequested;
3054 return bCutoffAllowed;