2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/options.h"
59 #include "gromacs/domdec/partition.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/gmxlib/nrnb.h"
62 #include "gromacs/gpu_utils/gpu_utils.h"
63 #include "gromacs/hardware/hw_info.h"
64 #include "gromacs/listed_forces/manage_threading.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/calc_verletbuf.h"
68 #include "gromacs/mdlib/constr.h"
69 #include "gromacs/mdlib/constraintrange.h"
70 #include "gromacs/mdlib/updategroups.h"
71 #include "gromacs/mdlib/vsite.h"
72 #include "gromacs/mdtypes/commrec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/mdrunoptions.h"
75 #include "gromacs/mdtypes/state.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/pulling/pull.h"
79 #include "gromacs/timing/wallcycle.h"
80 #include "gromacs/topology/block.h"
81 #include "gromacs/topology/idef.h"
82 #include "gromacs/topology/ifunc.h"
83 #include "gromacs/topology/mtop_lookup.h"
84 #include "gromacs/topology/mtop_util.h"
85 #include "gromacs/topology/topology.h"
86 #include "gromacs/utility/basedefinitions.h"
87 #include "gromacs/utility/basenetwork.h"
88 #include "gromacs/utility/cstringutil.h"
89 #include "gromacs/utility/exceptions.h"
90 #include "gromacs/utility/fatalerror.h"
91 #include "gromacs/utility/gmxmpi.h"
92 #include "gromacs/utility/logger.h"
93 #include "gromacs/utility/real.h"
94 #include "gromacs/utility/smalloc.h"
95 #include "gromacs/utility/strconvert.h"
96 #include "gromacs/utility/stringstream.h"
97 #include "gromacs/utility/stringutil.h"
98 #include "gromacs/utility/textwriter.h"
100 #include "atomdistribution.h"
102 #include "cellsizes.h"
103 #include "distribute.h"
104 #include "domdec_constraints.h"
105 #include "domdec_internal.h"
106 #include "domdec_vsite.h"
107 #include "redistribute.h"
110 // TODO remove this when moving domdec into gmx namespace
111 using gmx::DdRankOrder;
112 using gmx::DlbOption;
113 using gmx::DomdecOptions;
115 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
117 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
120 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
121 #define DD_FLAG_NRCG 65535
122 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
123 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
125 /* The DD zone order */
126 static const ivec dd_zo[DD_MAXZONE] =
127 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
129 /* The non-bonded zone-pair setup for domain decomposition
130 * The first number is the i-zone, the second number the first j-zone seen by
131 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
132 * As is, this is for 3D decomposition, where there are 4 i-zones.
133 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
134 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
137 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
144 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
146 static void index2xyz(ivec nc,int ind,ivec xyz)
148 xyz[XX] = ind % nc[XX];
149 xyz[YY] = (ind / nc[XX]) % nc[YY];
150 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
154 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
156 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
157 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
158 xyz[ZZ] = ind % nc[ZZ];
161 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
166 ddindex = dd_index(dd->nc, c);
167 if (dd->comm->bCartesianPP_PME)
169 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
171 else if (dd->comm->bCartesianPP)
174 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
185 int ddglatnr(const gmx_domdec_t *dd, int i)
195 if (i >= dd->comm->atomRanges.numAtomsTotal())
197 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
199 atnr = dd->globalAtomIndices[i] + 1;
205 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
207 return &dd->comm->cgs_gl;
210 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
212 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
213 return dd.comm->updateGroupingPerMoleculetype;
216 void dd_store_state(gmx_domdec_t *dd, t_state *state)
220 if (state->ddp_count != dd->ddp_count)
222 gmx_incons("The MD state does not match the domain decomposition state");
225 state->cg_gl.resize(dd->ncg_home);
226 for (i = 0; i < dd->ncg_home; i++)
228 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
231 state->ddp_count_cg_gl = dd->ddp_count;
234 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
236 return &dd->comm->zones;
239 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
240 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
242 gmx_domdec_zones_t *zones;
245 zones = &dd->comm->zones;
248 while (icg >= zones->izone[izone].cg1)
257 else if (izone < zones->nizone)
259 *jcg0 = zones->izone[izone].jcg0;
263 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
264 icg, izone, zones->nizone);
267 *jcg1 = zones->izone[izone].jcg1;
269 for (d = 0; d < dd->ndim; d++)
272 shift0[dim] = zones->izone[izone].shift0[dim];
273 shift1[dim] = zones->izone[izone].shift1[dim];
274 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
276 /* A conservative approach, this can be optimized */
283 int dd_numHomeAtoms(const gmx_domdec_t &dd)
285 return dd.comm->atomRanges.numHomeAtoms();
288 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
290 /* We currently set mdatoms entries for all atoms:
291 * local + non-local + communicated for vsite + constraints
294 return dd->comm->atomRanges.numAtomsTotal();
297 int dd_natoms_vsite(const gmx_domdec_t *dd)
299 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
302 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
304 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
305 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
308 void dd_move_x(gmx_domdec_t *dd,
310 gmx::ArrayRef<gmx::RVec> x,
311 gmx_wallcycle *wcycle)
313 wallcycle_start(wcycle, ewcMOVEX);
316 gmx_domdec_comm_t *comm;
317 gmx_domdec_comm_dim_t *cd;
318 rvec shift = {0, 0, 0};
319 gmx_bool bPBC, bScrew;
324 nat_tot = comm->atomRanges.numHomeAtoms();
325 for (int d = 0; d < dd->ndim; d++)
327 bPBC = (dd->ci[dd->dim[d]] == 0);
328 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
331 copy_rvec(box[dd->dim[d]], shift);
334 for (const gmx_domdec_ind_t &ind : cd->ind)
336 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
337 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
341 for (int j : ind.index)
343 sendBuffer[n] = x[j];
349 for (int j : ind.index)
351 /* We need to shift the coordinates */
352 for (int d = 0; d < DIM; d++)
354 sendBuffer[n][d] = x[j][d] + shift[d];
361 for (int j : ind.index)
364 sendBuffer[n][XX] = x[j][XX] + shift[XX];
366 * This operation requires a special shift force
367 * treatment, which is performed in calc_vir.
369 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
370 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
375 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
377 gmx::ArrayRef<gmx::RVec> receiveBuffer;
378 if (cd->receiveInPlace)
380 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
384 receiveBuffer = receiveBufferAccess.buffer;
386 /* Send and receive the coordinates */
387 ddSendrecv(dd, d, dddirBackward,
388 sendBuffer, receiveBuffer);
390 if (!cd->receiveInPlace)
393 for (int zone = 0; zone < nzone; zone++)
395 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
397 x[i] = receiveBuffer[j++];
401 nat_tot += ind.nrecv[nzone+1];
406 wallcycle_stop(wcycle, ewcMOVEX);
409 void dd_move_f(gmx_domdec_t *dd,
410 gmx::ArrayRef<gmx::RVec> f,
412 gmx_wallcycle *wcycle)
414 wallcycle_start(wcycle, ewcMOVEF);
417 gmx_domdec_comm_t *comm;
418 gmx_domdec_comm_dim_t *cd;
421 gmx_bool bShiftForcesNeedPbc, bScrew;
425 nzone = comm->zones.n/2;
426 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
427 for (int d = dd->ndim-1; d >= 0; d--)
429 /* Only forces in domains near the PBC boundaries need to
430 consider PBC in the treatment of fshift */
431 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
432 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
433 if (fshift == nullptr && !bScrew)
435 bShiftForcesNeedPbc = FALSE;
437 /* Determine which shift vector we need */
443 for (int p = cd->numPulses() - 1; p >= 0; p--)
445 const gmx_domdec_ind_t &ind = cd->ind[p];
446 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
447 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
449 nat_tot -= ind.nrecv[nzone+1];
451 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
453 gmx::ArrayRef<gmx::RVec> sendBuffer;
454 if (cd->receiveInPlace)
456 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
460 sendBuffer = sendBufferAccess.buffer;
462 for (int zone = 0; zone < nzone; zone++)
464 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
466 sendBuffer[j++] = f[i];
470 /* Communicate the forces */
471 ddSendrecv(dd, d, dddirForward,
472 sendBuffer, receiveBuffer);
473 /* Add the received forces */
475 if (!bShiftForcesNeedPbc)
477 for (int j : ind.index)
479 for (int d = 0; d < DIM; d++)
481 f[j][d] += receiveBuffer[n][d];
488 /* fshift should always be defined if this function is
489 * called when bShiftForcesNeedPbc is true */
490 assert(nullptr != fshift);
491 for (int j : ind.index)
493 for (int d = 0; d < DIM; d++)
495 f[j][d] += receiveBuffer[n][d];
497 /* Add this force to the shift force */
498 for (int d = 0; d < DIM; d++)
500 fshift[is][d] += receiveBuffer[n][d];
507 for (int j : ind.index)
509 /* Rotate the force */
510 f[j][XX] += receiveBuffer[n][XX];
511 f[j][YY] -= receiveBuffer[n][YY];
512 f[j][ZZ] -= receiveBuffer[n][ZZ];
515 /* Add this force to the shift force */
516 for (int d = 0; d < DIM; d++)
518 fshift[is][d] += receiveBuffer[n][d];
527 wallcycle_stop(wcycle, ewcMOVEF);
530 /* Convenience function for extracting a real buffer from an rvec buffer
532 * To reduce the number of temporary communication buffers and avoid
533 * cache polution, we reuse gmx::RVec buffers for storing reals.
534 * This functions return a real buffer reference with the same number
535 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
537 static gmx::ArrayRef<real>
538 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
540 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
544 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
547 gmx_domdec_comm_t *comm;
548 gmx_domdec_comm_dim_t *cd;
553 nat_tot = comm->atomRanges.numHomeAtoms();
554 for (int d = 0; d < dd->ndim; d++)
557 for (const gmx_domdec_ind_t &ind : cd->ind)
559 /* Note: We provision for RVec instead of real, so a factor of 3
560 * more than needed. The buffer actually already has this size
561 * and we pass a plain pointer below, so this does not matter.
563 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
564 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
566 for (int j : ind.index)
568 sendBuffer[n++] = v[j];
571 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
573 gmx::ArrayRef<real> receiveBuffer;
574 if (cd->receiveInPlace)
576 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
580 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
582 /* Send and receive the data */
583 ddSendrecv(dd, d, dddirBackward,
584 sendBuffer, receiveBuffer);
585 if (!cd->receiveInPlace)
588 for (int zone = 0; zone < nzone; zone++)
590 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
592 v[i] = receiveBuffer[j++];
596 nat_tot += ind.nrecv[nzone+1];
602 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
605 gmx_domdec_comm_t *comm;
606 gmx_domdec_comm_dim_t *cd;
610 nzone = comm->zones.n/2;
611 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
612 for (int d = dd->ndim-1; d >= 0; d--)
615 for (int p = cd->numPulses() - 1; p >= 0; p--)
617 const gmx_domdec_ind_t &ind = cd->ind[p];
619 /* Note: We provision for RVec instead of real, so a factor of 3
620 * more than needed. The buffer actually already has this size
621 * and we typecast, so this works as intended.
623 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
624 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
625 nat_tot -= ind.nrecv[nzone + 1];
627 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
629 gmx::ArrayRef<real> sendBuffer;
630 if (cd->receiveInPlace)
632 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
636 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
638 for (int zone = 0; zone < nzone; zone++)
640 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
642 sendBuffer[j++] = v[i];
646 /* Communicate the forces */
647 ddSendrecv(dd, d, dddirForward,
648 sendBuffer, receiveBuffer);
649 /* Add the received forces */
651 for (int j : ind.index)
653 v[j] += receiveBuffer[n];
661 real dd_cutoff_multibody(const gmx_domdec_t *dd)
663 gmx_domdec_comm_t *comm;
670 if (comm->haveInterDomainMultiBodyBondeds)
672 if (comm->cutoff_mbody > 0)
674 r = comm->cutoff_mbody;
678 /* cutoff_mbody=0 means we do not have DLB */
679 r = comm->cellsize_min[dd->dim[0]];
680 for (di = 1; di < dd->ndim; di++)
682 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
686 r = std::max(r, comm->cutoff_mbody);
690 r = std::min(r, comm->cutoff);
698 real dd_cutoff_twobody(const gmx_domdec_t *dd)
702 r_mb = dd_cutoff_multibody(dd);
704 return std::max(dd->comm->cutoff, r_mb);
708 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
713 nc = dd->nc[dd->comm->cartpmedim];
714 ntot = dd->comm->ntot[dd->comm->cartpmedim];
715 copy_ivec(coord, coord_pme);
716 coord_pme[dd->comm->cartpmedim] =
717 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
720 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
725 npme = dd->comm->npmenodes;
727 /* Here we assign a PME node to communicate with this DD node
728 * by assuming that the major index of both is x.
729 * We add cr->npmenodes/2 to obtain an even distribution.
731 return (ddindex*npme + npme/2)/npp;
734 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
739 snew(pme_rank, dd->comm->npmenodes);
741 for (i = 0; i < dd->nnodes; i++)
743 p0 = ddindex2pmeindex(dd, i);
744 p1 = ddindex2pmeindex(dd, i+1);
745 if (i+1 == dd->nnodes || p1 > p0)
749 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
751 pme_rank[n] = i + 1 + n;
759 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
767 if (dd->comm->bCartesian) {
768 gmx_ddindex2xyz(dd->nc,ddindex,coords);
769 dd_coords2pmecoords(dd,coords,coords_pme);
770 copy_ivec(dd->ntot,nc);
771 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
772 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
774 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
776 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
782 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
787 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
789 gmx_domdec_comm_t *comm;
791 int ddindex, nodeid = -1;
798 if (comm->bCartesianPP_PME)
801 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
806 ddindex = dd_index(cr->dd->nc, coords);
807 if (comm->bCartesianPP)
809 nodeid = comm->ddindex2simnodeid[ddindex];
815 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
827 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
828 const t_commrec gmx_unused *cr,
833 const gmx_domdec_comm_t *comm = dd->comm;
835 /* This assumes a uniform x domain decomposition grid cell size */
836 if (comm->bCartesianPP_PME)
839 ivec coord, coord_pme;
840 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
841 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
843 /* This is a PP node */
844 dd_cart_coord2pmecoord(dd, coord, coord_pme);
845 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
849 else if (comm->bCartesianPP)
851 if (sim_nodeid < dd->nnodes)
853 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
858 /* This assumes DD cells with identical x coordinates
859 * are numbered sequentially.
861 if (dd->comm->pmenodes == nullptr)
863 if (sim_nodeid < dd->nnodes)
865 /* The DD index equals the nodeid */
866 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
872 while (sim_nodeid > dd->comm->pmenodes[i])
876 if (sim_nodeid < dd->comm->pmenodes[i])
878 pmenode = dd->comm->pmenodes[i];
886 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
891 dd->comm->npmenodes_x, dd->comm->npmenodes_y
902 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
906 ivec coord, coord_pme;
910 std::vector<int> ddranks;
911 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
913 for (x = 0; x < dd->nc[XX]; x++)
915 for (y = 0; y < dd->nc[YY]; y++)
917 for (z = 0; z < dd->nc[ZZ]; z++)
919 if (dd->comm->bCartesianPP_PME)
924 dd_cart_coord2pmecoord(dd, coord, coord_pme);
925 if (dd->ci[XX] == coord_pme[XX] &&
926 dd->ci[YY] == coord_pme[YY] &&
927 dd->ci[ZZ] == coord_pme[ZZ])
929 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
934 /* The slab corresponds to the nodeid in the PME group */
935 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
937 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
946 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
948 gmx_bool bReceive = TRUE;
950 if (cr->npmenodes < dd->nnodes)
952 gmx_domdec_comm_t *comm = dd->comm;
953 if (comm->bCartesianPP_PME)
956 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
958 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
959 coords[comm->cartpmedim]++;
960 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
963 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
964 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
966 /* This is not the last PP node for pmenode */
971 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
976 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
977 if (cr->sim_nodeid+1 < cr->nnodes &&
978 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
980 /* This is not the last PP node for pmenode */
989 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
991 gmx_domdec_comm_t *comm;
996 snew(*dim_f, dd->nc[dim]+1);
998 for (i = 1; i < dd->nc[dim]; i++)
1000 if (comm->slb_frac[dim])
1002 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1006 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1009 (*dim_f)[dd->nc[dim]] = 1;
1012 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1014 int pmeindex, slab, nso, i;
1017 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1023 ddpme->dim = dimind;
1025 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1027 ddpme->nslab = (ddpme->dim == 0 ?
1028 dd->comm->npmenodes_x :
1029 dd->comm->npmenodes_y);
1031 if (ddpme->nslab <= 1)
1036 nso = dd->comm->npmenodes/ddpme->nslab;
1037 /* Determine for each PME slab the PP location range for dimension dim */
1038 snew(ddpme->pp_min, ddpme->nslab);
1039 snew(ddpme->pp_max, ddpme->nslab);
1040 for (slab = 0; slab < ddpme->nslab; slab++)
1042 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1043 ddpme->pp_max[slab] = 0;
1045 for (i = 0; i < dd->nnodes; i++)
1047 ddindex2xyz(dd->nc, i, xyz);
1048 /* For y only use our y/z slab.
1049 * This assumes that the PME x grid size matches the DD grid size.
1051 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1053 pmeindex = ddindex2pmeindex(dd, i);
1056 slab = pmeindex/nso;
1060 slab = pmeindex % ddpme->nslab;
1062 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1063 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1067 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1070 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1072 if (dd->comm->ddpme[0].dim == XX)
1074 return dd->comm->ddpme[0].maxshift;
1082 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1084 if (dd->comm->ddpme[0].dim == YY)
1086 return dd->comm->ddpme[0].maxshift;
1088 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1090 return dd->comm->ddpme[1].maxshift;
1098 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1100 /* Note that the cycles value can be incorrect, either 0 or some
1101 * extremely large value, when our thread migrated to another core
1102 * with an unsynchronized cycle counter. If this happens less often
1103 * that once per nstlist steps, this will not cause issues, since
1104 * we later subtract the maximum value from the sum over nstlist steps.
1105 * A zero count will slightly lower the total, but that's a small effect.
1106 * Note that the main purpose of the subtraction of the maximum value
1107 * is to avoid throwing off the load balancing when stalls occur due
1108 * e.g. system activity or network congestion.
1110 dd->comm->cycl[ddCycl] += cycles;
1111 dd->comm->cycl_n[ddCycl]++;
1112 if (cycles > dd->comm->cycl_max[ddCycl])
1114 dd->comm->cycl_max[ddCycl] = cycles;
1119 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1124 gmx_bool bPartOfGroup = FALSE;
1126 dim = dd->dim[dim_ind];
1127 copy_ivec(loc, loc_c);
1128 for (i = 0; i < dd->nc[dim]; i++)
1131 rank = dd_index(dd->nc, loc_c);
1132 if (rank == dd->rank)
1134 /* This process is part of the group */
1135 bPartOfGroup = TRUE;
1138 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1142 dd->comm->mpi_comm_load[dim_ind] = c_row;
1143 if (!isDlbDisabled(dd->comm))
1145 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1147 if (dd->ci[dim] == dd->master_ci[dim])
1149 /* This is the root process of this row */
1150 cellsizes.rowMaster = std::make_unique<RowMaster>();
1152 RowMaster &rowMaster = *cellsizes.rowMaster;
1153 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1154 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1155 rowMaster.isCellMin.resize(dd->nc[dim]);
1158 rowMaster.bounds.resize(dd->nc[dim]);
1160 rowMaster.buf_ncd.resize(dd->nc[dim]);
1164 /* This is not a root process, we only need to receive cell_f */
1165 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1168 if (dd->ci[dim] == dd->master_ci[dim])
1170 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1176 void dd_setup_dlb_resource_sharing(t_commrec *cr,
1180 int physicalnode_id_hash;
1182 MPI_Comm mpi_comm_pp_physicalnode;
1184 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1186 /* Only ranks with short-ranged tasks (currently) use GPUs.
1187 * If we don't have GPUs assigned, there are no resources to share.
1192 physicalnode_id_hash = gmx_physicalnode_id_hash();
1198 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1199 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1200 dd->rank, physicalnode_id_hash, gpu_id);
1202 /* Split the PP communicator over the physical nodes */
1203 /* TODO: See if we should store this (before), as it's also used for
1204 * for the nodecomm summation.
1206 // TODO PhysicalNodeCommunicator could be extended/used to handle
1207 // the need for per-node per-group communicators.
1208 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1209 &mpi_comm_pp_physicalnode);
1210 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1211 &dd->comm->mpi_comm_gpu_shared);
1212 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1213 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1217 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1220 /* Note that some ranks could share a GPU, while others don't */
1222 if (dd->comm->nrank_gpu_shared == 1)
1224 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1227 GMX_UNUSED_VALUE(cr);
1228 GMX_UNUSED_VALUE(gpu_id);
1232 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1235 int dim0, dim1, i, j;
1240 fprintf(debug, "Making load communicators\n");
1243 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1244 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1252 make_load_communicator(dd, 0, loc);
1256 for (i = 0; i < dd->nc[dim0]; i++)
1259 make_load_communicator(dd, 1, loc);
1265 for (i = 0; i < dd->nc[dim0]; i++)
1269 for (j = 0; j < dd->nc[dim1]; j++)
1272 make_load_communicator(dd, 2, loc);
1279 fprintf(debug, "Finished making load communicators\n");
1284 /*! \brief Sets up the relation between neighboring domains and zones */
1285 static void setup_neighbor_relations(gmx_domdec_t *dd)
1287 int d, dim, i, j, m;
1289 gmx_domdec_zones_t *zones;
1290 gmx_domdec_ns_ranges_t *izone;
1292 for (d = 0; d < dd->ndim; d++)
1295 copy_ivec(dd->ci, tmp);
1296 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1297 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1298 copy_ivec(dd->ci, tmp);
1299 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1300 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1303 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1306 dd->neighbor[d][1]);
1310 int nzone = (1 << dd->ndim);
1311 GMX_ASSERT(dd->ndim < DIM, "Invalid number of dimensions");
1312 int nizone = (1 << std::max(dd->ndim - 1, 0));
1313 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1315 zones = &dd->comm->zones;
1317 for (i = 0; i < nzone; i++)
1320 clear_ivec(zones->shift[i]);
1321 for (d = 0; d < dd->ndim; d++)
1323 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1328 for (i = 0; i < nzone; i++)
1330 for (d = 0; d < DIM; d++)
1332 s[d] = dd->ci[d] - zones->shift[i][d];
1337 else if (s[d] >= dd->nc[d])
1343 zones->nizone = nizone;
1344 for (i = 0; i < zones->nizone; i++)
1346 assert(ddNonbondedZonePairRanges[i][0] == i);
1348 izone = &zones->izone[i];
1349 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1350 * j-zones up to nzone.
1352 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1353 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1354 for (dim = 0; dim < DIM; dim++)
1356 if (dd->nc[dim] == 1)
1358 /* All shifts should be allowed */
1359 izone->shift0[dim] = -1;
1360 izone->shift1[dim] = 1;
1364 /* Determine the min/max j-zone shift wrt the i-zone */
1365 izone->shift0[dim] = 1;
1366 izone->shift1[dim] = -1;
1367 for (j = izone->j0; j < izone->j1; j++)
1369 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1370 if (shift_diff < izone->shift0[dim])
1372 izone->shift0[dim] = shift_diff;
1374 if (shift_diff > izone->shift1[dim])
1376 izone->shift1[dim] = shift_diff;
1383 if (!isDlbDisabled(dd->comm))
1385 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1388 if (dd->comm->bRecordLoad)
1390 make_load_communicators(dd);
1394 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1396 t_commrec gmx_unused *cr,
1397 bool gmx_unused reorder)
1400 gmx_domdec_comm_t *comm;
1407 if (comm->bCartesianPP)
1409 /* Set up cartesian communication for the particle-particle part */
1410 GMX_LOG(mdlog.info).appendTextFormatted(
1411 "Will use a Cartesian communicator: %d x %d x %d",
1412 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1414 for (int i = 0; i < DIM; i++)
1418 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1420 /* We overwrite the old communicator with the new cartesian one */
1421 cr->mpi_comm_mygroup = comm_cart;
1424 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1425 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1427 if (comm->bCartesianPP_PME)
1429 /* Since we want to use the original cartesian setup for sim,
1430 * and not the one after split, we need to make an index.
1432 snew(comm->ddindex2ddnodeid, dd->nnodes);
1433 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1434 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
1435 /* Get the rank of the DD master,
1436 * above we made sure that the master node is a PP node.
1446 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1448 else if (comm->bCartesianPP)
1450 if (cr->npmenodes == 0)
1452 /* The PP communicator is also
1453 * the communicator for this simulation
1455 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1457 cr->nodeid = dd->rank;
1459 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1461 /* We need to make an index to go from the coordinates
1462 * to the nodeid of this simulation.
1464 snew(comm->ddindex2simnodeid, dd->nnodes);
1465 snew(buf, dd->nnodes);
1466 if (thisRankHasDuty(cr, DUTY_PP))
1468 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1470 /* Communicate the ddindex to simulation nodeid index */
1471 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1472 cr->mpi_comm_mysim);
1475 /* Determine the master coordinates and rank.
1476 * The DD master should be the same node as the master of this sim.
1478 for (int i = 0; i < dd->nnodes; i++)
1480 if (comm->ddindex2simnodeid[i] == 0)
1482 ddindex2xyz(dd->nc, i, dd->master_ci);
1483 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1488 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1493 /* No Cartesian communicators */
1494 /* We use the rank in dd->comm->all as DD index */
1495 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1496 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1498 clear_ivec(dd->master_ci);
1502 GMX_LOG(mdlog.info).appendTextFormatted(
1503 "Domain decomposition rank %d, coordinates %d %d %d\n",
1504 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1508 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1509 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1513 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1517 gmx_domdec_comm_t *comm = dd->comm;
1519 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
1522 snew(comm->ddindex2simnodeid, dd->nnodes);
1523 snew(buf, dd->nnodes);
1524 if (thisRankHasDuty(cr, DUTY_PP))
1526 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1528 /* Communicate the ddindex to simulation nodeid index */
1529 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1530 cr->mpi_comm_mysim);
1534 GMX_UNUSED_VALUE(dd);
1535 GMX_UNUSED_VALUE(cr);
1539 static void split_communicator(const gmx::MDLogger &mdlog,
1540 t_commrec *cr, gmx_domdec_t *dd,
1541 DdRankOrder gmx_unused rankOrder,
1542 bool gmx_unused reorder)
1544 gmx_domdec_comm_t *comm;
1553 if (comm->bCartesianPP)
1555 for (i = 1; i < DIM; i++)
1557 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
1559 if (bDiv[YY] || bDiv[ZZ])
1561 comm->bCartesianPP_PME = TRUE;
1562 /* If we have 2D PME decomposition, which is always in x+y,
1563 * we stack the PME only nodes in z.
1564 * Otherwise we choose the direction that provides the thinnest slab
1565 * of PME only nodes as this will have the least effect
1566 * on the PP communication.
1567 * But for the PME communication the opposite might be better.
1569 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
1571 dd->nc[YY] > dd->nc[ZZ]))
1573 comm->cartpmedim = ZZ;
1577 comm->cartpmedim = YY;
1579 comm->ntot[comm->cartpmedim]
1580 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
1584 GMX_LOG(mdlog.info).appendTextFormatted(
1585 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1586 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
1587 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1591 if (comm->bCartesianPP_PME)
1597 GMX_LOG(mdlog.info).appendTextFormatted(
1598 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1599 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
1601 for (i = 0; i < DIM; i++)
1605 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
1607 MPI_Comm_rank(comm_cart, &rank);
1608 if (MASTER(cr) && rank != 0)
1610 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1613 /* With this assigment we loose the link to the original communicator
1614 * which will usually be MPI_COMM_WORLD, unless have multisim.
1616 cr->mpi_comm_mysim = comm_cart;
1617 cr->sim_nodeid = rank;
1619 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
1621 GMX_LOG(mdlog.info).appendTextFormatted(
1622 "Cartesian rank %d, coordinates %d %d %d\n",
1623 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1625 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1629 if (cr->npmenodes == 0 ||
1630 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
1632 cr->duty = DUTY_PME;
1635 /* Split the sim communicator into PP and PME only nodes */
1636 MPI_Comm_split(cr->mpi_comm_mysim,
1637 getThisRankDuties(cr),
1638 dd_index(comm->ntot, dd->ci),
1639 &cr->mpi_comm_mygroup);
1646 case DdRankOrder::pp_pme:
1647 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1649 case DdRankOrder::interleave:
1650 /* Interleave the PP-only and PME-only ranks */
1651 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1652 comm->pmenodes = dd_interleaved_pme_ranks(dd);
1654 case DdRankOrder::cartesian:
1657 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
1660 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
1662 cr->duty = DUTY_PME;
1669 /* Split the sim communicator into PP and PME only nodes */
1670 MPI_Comm_split(cr->mpi_comm_mysim,
1671 getThisRankDuties(cr),
1673 &cr->mpi_comm_mygroup);
1674 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1678 GMX_LOG(mdlog.info).appendTextFormatted(
1679 "This rank does only %s work.\n",
1680 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1683 /*! \brief Generates the MPI communicators for domain decomposition */
1684 static void make_dd_communicators(const gmx::MDLogger &mdlog,
1686 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
1688 gmx_domdec_comm_t *comm;
1693 copy_ivec(dd->nc, comm->ntot);
1695 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1696 comm->bCartesianPP_PME = FALSE;
1698 /* Reorder the nodes by default. This might change the MPI ranks.
1699 * Real reordering is only supported on very few architectures,
1700 * Blue Gene is one of them.
1702 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
1704 if (cr->npmenodes > 0)
1706 /* Split the communicator into a PP and PME part */
1707 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
1708 if (comm->bCartesianPP_PME)
1710 /* We (possibly) reordered the nodes in split_communicator,
1711 * so it is no longer required in make_pp_communicator.
1713 CartReorder = FALSE;
1718 /* All nodes do PP and PME */
1720 /* We do not require separate communicators */
1721 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1725 if (thisRankHasDuty(cr, DUTY_PP))
1727 /* Copy or make a new PP communicator */
1728 make_pp_communicator(mdlog, dd, cr, CartReorder);
1732 receive_ddindex2simnodeid(dd, cr);
1735 if (!thisRankHasDuty(cr, DUTY_PME))
1737 /* Set up the commnuication to our PME node */
1738 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1739 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
1742 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1743 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1748 dd->pme_nodeid = -1;
1751 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1754 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1756 comm->cgs_gl.index[comm->cgs_gl.nr]);
1760 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1761 const char *dir, int nc, const char *size_string)
1763 real *slb_frac, tot;
1768 if (nc > 1 && size_string != nullptr)
1770 GMX_LOG(mdlog.info).appendTextFormatted(
1771 "Using static load balancing for the %s direction", dir);
1774 for (i = 0; i < nc; i++)
1777 sscanf(size_string, "%20lf%n", &dbl, &n);
1780 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1787 std::string relativeCellSizes = "Relative cell sizes:";
1788 for (i = 0; i < nc; i++)
1791 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1793 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1799 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1802 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1804 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1806 for (auto &ilist : extractILists(*ilists, IF_BOND))
1808 if (NRAL(ilist.functionType) > 2)
1810 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1818 static int dd_getenv(const gmx::MDLogger &mdlog,
1819 const char *env_var, int def)
1825 val = getenv(env_var);
1828 if (sscanf(val, "%20d", &nst) <= 0)
1832 GMX_LOG(mdlog.info).appendTextFormatted(
1833 "Found env.var. %s = %s, using value %d",
1840 static void check_dd_restrictions(const gmx_domdec_t *dd,
1841 const t_inputrec *ir,
1842 const gmx::MDLogger &mdlog)
1844 if (ir->ePBC == epbcSCREW &&
1845 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1847 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1850 if (ir->ns_type == ensSIMPLE)
1852 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1855 if (ir->nstlist == 0)
1857 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1860 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1862 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1866 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
1871 r = ddbox->box_size[XX];
1872 for (di = 0; di < dd->ndim; di++)
1875 /* Check using the initial average cell size */
1876 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
1882 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1884 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1885 const std::string &reasonStr,
1886 const gmx::MDLogger &mdlog)
1888 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1889 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1891 if (cmdlineDlbState == DlbState::onUser)
1893 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1895 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1897 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1899 return DlbState::offForever;
1902 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1904 * This function parses the parameters of "-dlb" command line option setting
1905 * corresponding state values. Then it checks the consistency of the determined
1906 * state with other run parameters and settings. As a result, the initial state
1907 * may be altered or an error may be thrown if incompatibility of options is detected.
1909 * \param [in] mdlog Logger.
1910 * \param [in] dlbOption Enum value for the DLB option.
1911 * \param [in] bRecordLoad True if the load balancer is recording load information.
1912 * \param [in] mdrunOptions Options for mdrun.
1913 * \param [in] ir Pointer mdrun to input parameters.
1914 * \returns DLB initial/startup state.
1916 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1917 DlbOption dlbOption, gmx_bool bRecordLoad,
1918 const gmx::MdrunOptions &mdrunOptions,
1919 const t_inputrec *ir)
1921 DlbState dlbState = DlbState::offCanTurnOn;
1925 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1926 case DlbOption::no: dlbState = DlbState::offUser; break;
1927 case DlbOption::yes: dlbState = DlbState::onUser; break;
1928 default: gmx_incons("Invalid dlbOption enum value");
1931 /* Reruns don't support DLB: bail or override auto mode */
1932 if (mdrunOptions.rerun)
1934 std::string reasonStr = "it is not supported in reruns.";
1935 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1938 /* Unsupported integrators */
1939 if (!EI_DYNAMICS(ir->eI))
1941 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1942 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1945 /* Without cycle counters we can't time work to balance on */
1948 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1949 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1952 if (mdrunOptions.reproducible)
1954 std::string reasonStr = "you started a reproducible run.";
1957 case DlbState::offUser:
1959 case DlbState::offForever:
1960 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1962 case DlbState::offCanTurnOn:
1963 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1964 case DlbState::onCanTurnOff:
1965 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1967 case DlbState::onUser:
1968 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1970 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1977 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
1980 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
1982 /* Decomposition order z,y,x */
1983 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
1984 for (int dim = DIM-1; dim >= 0; dim--)
1986 if (dd->nc[dim] > 1)
1988 dd->dim[dd->ndim++] = dim;
1994 /* Decomposition order x,y,z */
1995 for (int dim = 0; dim < DIM; dim++)
1997 if (dd->nc[dim] > 1)
1999 dd->dim[dd->ndim++] = dim;
2006 /* Set dim[0] to avoid extra checks on ndim in several places */
2011 static gmx_domdec_comm_t *init_dd_comm()
2013 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2015 comm->n_load_have = 0;
2016 comm->n_load_collect = 0;
2018 comm->haveTurnedOffDlb = false;
2020 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2022 comm->sum_nat[i] = 0;
2026 comm->load_step = 0;
2029 clear_ivec(comm->load_lim);
2033 /* This should be replaced by a unique pointer */
2034 comm->balanceRegion = ddBalanceRegionAllocate();
2039 /* Returns whether mtop contains constraints and/or vsites */
2040 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2042 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2044 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2046 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2055 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2056 const gmx_mtop_t &mtop,
2057 const t_inputrec &inputrec,
2059 int numMpiRanksTotal,
2060 gmx_domdec_comm_t *comm)
2062 /* When we have constraints and/or vsites, it is beneficial to use
2063 * update groups (when possible) to allow independent update of groups.
2065 if (!systemHasConstraintsOrVsites(mtop))
2067 /* No constraints or vsites, atoms can be updated independently */
2071 comm->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2072 comm->useUpdateGroups =
2073 (!comm->updateGroupingPerMoleculetype.empty() &&
2074 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2076 if (comm->useUpdateGroups)
2078 int numUpdateGroups = 0;
2079 for (const auto &molblock : mtop.molblock)
2081 numUpdateGroups += molblock.nmol*comm->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2084 /* Note: We would like to use dd->nnodes for the atom count estimate,
2085 * but that is not yet available here. But this anyhow only
2086 * affect performance up to the second dd_partition_system call.
2088 int homeAtomCountEstimate = mtop.natoms/numMpiRanksTotal;
2089 comm->updateGroupsCog =
2090 std::make_unique<gmx::UpdateGroupsCog>(mtop,
2091 comm->updateGroupingPerMoleculetype,
2092 maxReferenceTemperature(inputrec),
2093 homeAtomCountEstimate);
2095 /* To use update groups, the large domain-to-domain cutoff distance
2096 * should be compatible with the box size.
2098 comm->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*comm, 0) < cutoffMargin);
2100 if (comm->useUpdateGroups)
2102 GMX_LOG(mdlog.info).appendTextFormatted(
2103 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2105 mtop.natoms/static_cast<double>(numUpdateGroups),
2106 comm->updateGroupsCog->maxUpdateGroupRadius());
2110 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2111 comm->updateGroupingPerMoleculetype.clear();
2112 comm->updateGroupsCog.reset(nullptr);
2117 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2118 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
2119 t_commrec *cr, gmx_domdec_t *dd,
2120 const DomdecOptions &options,
2121 const gmx::MdrunOptions &mdrunOptions,
2122 const gmx_mtop_t *mtop,
2123 const t_inputrec *ir,
2125 gmx::ArrayRef<const gmx::RVec> xGlobal,
2129 real r_bonded_limit = -1;
2130 const real tenPercentMargin = 1.1;
2131 gmx_domdec_comm_t *comm = dd->comm;
2133 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
2134 dd->numBoundedDimensions = inputrec2nboundeddim(ir);
2135 dd->haveDynamicBox = inputrecDynamicBox(ir);
2136 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
2138 dd->pme_recv_f_alloc = 0;
2139 dd->pme_recv_f_buf = nullptr;
2141 /* Initialize to GPU share count to 0, might change later */
2142 comm->nrank_gpu_shared = 0;
2144 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
2145 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2146 /* To consider turning DLB on after 2*nstlist steps we need to check
2147 * at partitioning count 3. Thus we need to increase the first count by 2.
2149 comm->ddPartioningCountFirstDlbOff += 2;
2151 GMX_LOG(mdlog.info).appendTextFormatted(
2152 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
2154 comm->bPMELoadBalDLBLimits = FALSE;
2156 /* Allocate the charge group/atom sorting struct */
2157 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2159 /* We need to decide on update groups early, as this affects communication distances */
2160 comm->useUpdateGroups = false;
2161 if (ir->cutoff_scheme == ecutsVERLET)
2163 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2164 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, cr->nnodes, comm);
2167 // TODO: Check whether all bondeds are within update groups
2168 comm->haveInterDomainBondeds = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2169 mtop->bIntermolecularInteractions);
2170 comm->haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2172 if (comm->useUpdateGroups)
2174 dd->splitConstraints = false;
2175 dd->splitSettles = false;
2179 dd->splitConstraints = gmx::inter_charge_group_constraints(*mtop);
2180 dd->splitSettles = gmx::inter_charge_group_settles(*mtop);
2185 /* Set the cut-off to some very large value,
2186 * so we don't need if statements everywhere in the code.
2187 * We use sqrt, since the cut-off is squared in some places.
2189 comm->cutoff = GMX_CUTOFF_INF;
2193 comm->cutoff = atomToAtomIntoDomainToDomainCutoff(*comm, ir->rlist);
2195 comm->cutoff_mbody = 0;
2197 /* Determine the minimum cell size limit, affected by many factors */
2198 comm->cellsize_limit = 0;
2199 comm->bBondComm = FALSE;
2201 /* We do not allow home atoms to move beyond the neighboring domain
2202 * between domain decomposition steps, which limits the cell size.
2203 * Get an estimate of cell size limit due to atom displacement.
2204 * In most cases this is a large overestimate, because it assumes
2205 * non-interaction atoms.
2206 * We set the chance to 1 in a trillion steps.
2208 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2209 const real limitForAtomDisplacement =
2210 minCellSizeForAtomDisplacement(*mtop, *ir,
2211 comm->updateGroupingPerMoleculetype,
2212 c_chanceThatAtomMovesBeyondDomain);
2213 GMX_LOG(mdlog.info).appendTextFormatted(
2214 "Minimum cell size due to atom displacement: %.3f nm",
2215 limitForAtomDisplacement);
2217 comm->cellsize_limit = std::max(comm->cellsize_limit,
2218 limitForAtomDisplacement);
2220 /* TODO: PME decomposition currently requires atoms not to be more than
2221 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2222 * In nearly all cases, limitForAtomDisplacement will be smaller
2223 * than 2/3*rlist, so the PME requirement is satisfied.
2224 * But it would be better for both correctness and performance
2225 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2226 * Note that we would need to improve the pairlist buffer case.
2229 if (comm->haveInterDomainBondeds)
2231 if (options.minimumCommunicationRange > 0)
2233 comm->cutoff_mbody = atomToAtomIntoDomainToDomainCutoff(*comm, options.minimumCommunicationRange);
2234 if (options.useBondedCommunication)
2236 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
2240 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
2242 r_bonded_limit = comm->cutoff_mbody;
2244 else if (ir->bPeriodicMols)
2246 /* Can not easily determine the required cut-off */
2247 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2248 comm->cutoff_mbody = comm->cutoff/2;
2249 r_bonded_limit = comm->cutoff_mbody;
2257 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2258 options.checkBondedInteractions,
2261 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2262 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2264 /* We use an initial margin of 10% for the minimum cell size,
2265 * except when we are just below the non-bonded cut-off.
2267 if (options.useBondedCommunication)
2269 if (std::max(r_2b, r_mb) > comm->cutoff)
2271 r_bonded = std::max(r_2b, r_mb);
2272 r_bonded_limit = tenPercentMargin*r_bonded;
2273 comm->bBondComm = TRUE;
2278 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
2280 /* We determine cutoff_mbody later */
2284 /* No special bonded communication,
2285 * simply increase the DD cut-off.
2287 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
2288 comm->cutoff_mbody = r_bonded_limit;
2289 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
2292 GMX_LOG(mdlog.info).appendTextFormatted(
2293 "Minimum cell size due to bonded interactions: %.3f nm",
2296 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
2300 if (dd->splitConstraints && options.constraintCommunicationRange <= 0)
2302 /* There is a cell size limit due to the constraints (P-LINCS) */
2303 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
2304 GMX_LOG(mdlog.info).appendTextFormatted(
2305 "Estimated maximum distance required for P-LINCS: %.3f nm",
2307 if (rconstr > comm->cellsize_limit)
2309 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2312 else if (options.constraintCommunicationRange > 0)
2314 /* Here we do not check for dd->splitConstraints.
2315 * because one can also set a cell size limit for virtual sites only
2316 * and at this point we don't know yet if there are intercg v-sites.
2318 GMX_LOG(mdlog.info).appendTextFormatted(
2319 "User supplied maximum distance required for P-LINCS: %.3f nm",
2320 options.constraintCommunicationRange);
2321 rconstr = options.constraintCommunicationRange;
2323 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
2325 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2327 if (options.numCells[XX] > 0)
2329 copy_ivec(options.numCells, dd->nc);
2330 set_dd_dim(mdlog, dd);
2331 set_ddbox_cr(*cr, &dd->nc, *ir, box, xGlobal, ddbox);
2333 if (options.numPmeRanks >= 0)
2335 cr->npmenodes = options.numPmeRanks;
2339 /* When the DD grid is set explicitly and -npme is set to auto,
2340 * don't use PME ranks. We check later if the DD grid is
2341 * compatible with the total number of ranks.
2346 real acs = average_cellsize_min(dd, ddbox);
2347 if (acs < comm->cellsize_limit)
2349 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2350 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2351 acs, comm->cellsize_limit);
2356 set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2358 /* We need to choose the optimal DD grid and possibly PME nodes */
2360 dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
2361 options.numPmeRanks,
2362 !isDlbDisabled(comm),
2364 comm->cellsize_limit, comm->cutoff,
2365 comm->haveInterDomainBondeds);
2367 if (dd->nc[XX] == 0)
2370 gmx_bool bC = (dd->splitConstraints && rconstr > r_bonded_limit);
2371 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2372 !bC ? "-rdd" : "-rcon",
2373 comm->dlbState != DlbState::offUser ? " or -dds" : "",
2374 bC ? " or your LINCS settings" : "");
2376 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2377 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2379 "Look in the log file for details on the domain decomposition",
2380 cr->nnodes-cr->npmenodes, limit, buf);
2382 set_dd_dim(mdlog, dd);
2385 GMX_LOG(mdlog.info).appendTextFormatted(
2386 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2387 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
2389 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2390 if (cr->nnodes - dd->nnodes != cr->npmenodes)
2392 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2393 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2394 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
2396 if (cr->npmenodes > dd->nnodes)
2398 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2399 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
2401 if (cr->npmenodes > 0)
2403 comm->npmenodes = cr->npmenodes;
2407 comm->npmenodes = dd->nnodes;
2410 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2412 /* The following choices should match those
2413 * in comm_cost_est in domdec_setup.c.
2414 * Note that here the checks have to take into account
2415 * that the decomposition might occur in a different order than xyz
2416 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2417 * in which case they will not match those in comm_cost_est,
2418 * but since that is mainly for testing purposes that's fine.
2420 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
2421 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
2422 getenv("GMX_PMEONEDD") == nullptr)
2424 comm->npmedecompdim = 2;
2425 comm->npmenodes_x = dd->nc[XX];
2426 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
2430 /* In case nc is 1 in both x and y we could still choose to
2431 * decompose pme in y instead of x, but we use x for simplicity.
2433 comm->npmedecompdim = 1;
2434 if (dd->dim[0] == YY)
2436 comm->npmenodes_x = 1;
2437 comm->npmenodes_y = comm->npmenodes;
2441 comm->npmenodes_x = comm->npmenodes;
2442 comm->npmenodes_y = 1;
2445 GMX_LOG(mdlog.info).appendTextFormatted(
2446 "PME domain decomposition: %d x %d x %d",
2447 comm->npmenodes_x, comm->npmenodes_y, 1);
2451 comm->npmedecompdim = 0;
2452 comm->npmenodes_x = 0;
2453 comm->npmenodes_y = 0;
2456 snew(comm->slb_frac, DIM);
2457 if (isDlbDisabled(comm))
2459 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2460 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2461 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2464 if (comm->haveInterDomainBondeds && comm->cutoff_mbody == 0)
2466 if (comm->bBondComm || !isDlbDisabled(comm))
2468 /* Set the bonded communication distance to halfway
2469 * the minimum and the maximum,
2470 * since the extra communication cost is nearly zero.
2472 real acs = average_cellsize_min(dd, ddbox);
2473 comm->cutoff_mbody = 0.5*(r_bonded + acs);
2474 if (!isDlbDisabled(comm))
2476 /* Check if this does not limit the scaling */
2477 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2478 options.dlbScaling*acs);
2480 if (!comm->bBondComm)
2482 /* Without bBondComm do not go beyond the n.b. cut-off */
2483 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
2484 if (comm->cellsize_limit >= comm->cutoff)
2486 /* We don't loose a lot of efficieny
2487 * when increasing it to the n.b. cut-off.
2488 * It can even be slightly faster, because we need
2489 * less checks for the communication setup.
2491 comm->cutoff_mbody = comm->cutoff;
2494 /* Check if we did not end up below our original limit */
2495 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
2497 if (comm->cutoff_mbody > comm->cellsize_limit)
2499 comm->cellsize_limit = comm->cutoff_mbody;
2502 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2507 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2508 "cellsize limit %f\n",
2509 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
2514 check_dd_restrictions(dd, ir, mdlog);
2518 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2523 ncg = ncg_mtop(mtop);
2524 snew(bLocalCG, ncg);
2525 for (cg = 0; cg < ncg; cg++)
2527 bLocalCG[cg] = FALSE;
2533 void dd_init_bondeds(FILE *fplog,
2535 const gmx_mtop_t *mtop,
2536 const gmx_vsite_t *vsite,
2537 const t_inputrec *ir,
2538 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2540 gmx_domdec_comm_t *comm;
2542 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2546 if (comm->bBondComm)
2548 /* Communicate atoms beyond the cut-off for bonded interactions */
2551 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2553 comm->bLocalCG = init_bLocalCG(mtop);
2557 /* Only communicate atoms based on cut-off */
2558 comm->cglink = nullptr;
2559 comm->bLocalCG = nullptr;
2563 static void writeSettings(gmx::TextWriter *log,
2565 const gmx_mtop_t *mtop,
2566 const t_inputrec *ir,
2567 gmx_bool bDynLoadBal,
2569 const gmx_ddbox_t *ddbox)
2571 gmx_domdec_comm_t *comm;
2580 log->writeString("The maximum number of communication pulses is:");
2581 for (d = 0; d < dd->ndim; d++)
2583 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2585 log->ensureLineBreak();
2586 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2587 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2588 log->writeString("The allowed shrink of domain decomposition cells is:");
2589 for (d = 0; d < DIM; d++)
2593 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2600 comm->cellsize_min_dlb[d]/
2601 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2603 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2606 log->ensureLineBreak();
2610 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2611 log->writeString("The initial number of communication pulses is:");
2612 for (d = 0; d < dd->ndim; d++)
2614 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2616 log->ensureLineBreak();
2617 log->writeString("The initial domain decomposition cell size is:");
2618 for (d = 0; d < DIM; d++)
2622 log->writeStringFormatted(" %c %.2f nm",
2623 dim2char(d), dd->comm->cellsize_min[d]);
2626 log->ensureLineBreak();
2630 const bool haveInterDomainVsites =
2631 (countInterUpdategroupVsites(*mtop, comm->updateGroupingPerMoleculetype) != 0);
2633 if (comm->haveInterDomainBondeds ||
2634 haveInterDomainVsites ||
2635 dd->splitConstraints || dd->splitSettles)
2637 std::string decompUnits;
2638 if (comm->useUpdateGroups)
2640 decompUnits = "atom groups";
2644 decompUnits = "atoms";
2647 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2648 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
2652 limit = dd->comm->cellsize_limit;
2656 if (dynamic_dd_box(*dd))
2658 log->writeLine("(the following are initial values, they could change due to box deformation)");
2660 limit = dd->comm->cellsize_min[XX];
2661 for (d = 1; d < DIM; d++)
2663 limit = std::min(limit, dd->comm->cellsize_min[d]);
2667 if (comm->haveInterDomainBondeds)
2669 log->writeLineFormatted("%40s %-7s %6.3f nm",
2670 "two-body bonded interactions", "(-rdd)",
2671 std::max(comm->cutoff, comm->cutoff_mbody));
2672 log->writeLineFormatted("%40s %-7s %6.3f nm",
2673 "multi-body bonded interactions", "(-rdd)",
2674 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
2676 if (haveInterDomainVsites)
2678 log->writeLineFormatted("%40s %-7s %6.3f nm",
2679 "virtual site constructions", "(-rcon)", limit);
2681 if (dd->splitConstraints || dd->splitSettles)
2683 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2685 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2686 separation.c_str(), "(-rcon)", limit);
2688 log->ensureLineBreak();
2692 static void logSettings(const gmx::MDLogger &mdlog,
2694 const gmx_mtop_t *mtop,
2695 const t_inputrec *ir,
2697 const gmx_ddbox_t *ddbox)
2699 gmx::StringOutputStream stream;
2700 gmx::TextWriter log(&stream);
2701 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2702 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2705 log.ensureEmptyLine();
2706 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2708 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2710 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2713 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2716 const t_inputrec *ir,
2717 const gmx_ddbox_t *ddbox)
2719 gmx_domdec_comm_t *comm;
2720 int d, dim, npulse, npulse_d_max, npulse_d;
2725 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2727 /* Determine the maximum number of comm. pulses in one dimension */
2729 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2731 /* Determine the maximum required number of grid pulses */
2732 if (comm->cellsize_limit >= comm->cutoff)
2734 /* Only a single pulse is required */
2737 else if (!bNoCutOff && comm->cellsize_limit > 0)
2739 /* We round down slightly here to avoid overhead due to the latency
2740 * of extra communication calls when the cut-off
2741 * would be only slightly longer than the cell size.
2742 * Later cellsize_limit is redetermined,
2743 * so we can not miss interactions due to this rounding.
2745 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
2749 /* There is no cell size limit */
2750 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2753 if (!bNoCutOff && npulse > 1)
2755 /* See if we can do with less pulses, based on dlb_scale */
2757 for (d = 0; d < dd->ndim; d++)
2760 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
2761 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2762 npulse_d_max = std::max(npulse_d_max, npulse_d);
2764 npulse = std::min(npulse, npulse_d_max);
2767 /* This env var can override npulse */
2768 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2775 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2776 for (d = 0; d < dd->ndim; d++)
2778 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2779 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2780 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2782 comm->bVacDLBNoLimit = FALSE;
2786 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2787 if (!comm->bVacDLBNoLimit)
2789 comm->cellsize_limit = std::max(comm->cellsize_limit,
2790 comm->cutoff/comm->maxpulse);
2792 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2793 /* Set the minimum cell size for each DD dimension */
2794 for (d = 0; d < dd->ndim; d++)
2796 if (comm->bVacDLBNoLimit ||
2797 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
2799 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2803 comm->cellsize_min_dlb[dd->dim[d]] =
2804 comm->cutoff/comm->cd[d].np_dlb;
2807 if (comm->cutoff_mbody <= 0)
2809 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
2817 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2819 /* If each molecule is a single charge group
2820 * or we use domain decomposition for each periodic dimension,
2821 * we do not need to take pbc into account for the bonded interactions.
2823 return (ePBC != epbcNONE && dd->comm->haveInterDomainBondeds &&
2826 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2829 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2830 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2831 gmx_domdec_t *dd, real dlb_scale,
2832 const gmx_mtop_t *mtop, const t_inputrec *ir,
2833 const gmx_ddbox_t *ddbox)
2835 gmx_domdec_comm_t *comm;
2841 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2843 init_ddpme(dd, &comm->ddpme[0], 0);
2844 if (comm->npmedecompdim >= 2)
2846 init_ddpme(dd, &comm->ddpme[1], 1);
2851 comm->npmenodes = 0;
2852 if (dd->pme_nodeid >= 0)
2854 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2855 "Can not have separate PME ranks without PME electrostatics");
2861 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
2863 if (!isDlbDisabled(comm))
2865 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2868 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2870 if (ir->ePBC == epbcNONE)
2872 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2877 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
2881 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2883 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2885 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2886 static_cast<int>(vol_frac*natoms_tot));
2889 /*! \brief Set some important DD parameters that can be modified by env.vars */
2890 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
2891 gmx_domdec_t *dd, int rank_mysim)
2893 gmx_domdec_comm_t *comm = dd->comm;
2895 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2896 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2897 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2898 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2899 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2900 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2901 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2905 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2910 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2911 if (comm->eFlop > 1)
2913 srand(1 + rank_mysim);
2915 comm->bRecordLoad = TRUE;
2919 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
2923 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
2925 const DomdecOptions &options,
2926 const gmx::MdrunOptions &mdrunOptions,
2927 const gmx_mtop_t *mtop,
2928 const t_inputrec *ir,
2930 gmx::ArrayRef<const gmx::RVec> xGlobal,
2931 gmx::LocalAtomSetManager *atomSets)
2935 GMX_LOG(mdlog.info).appendTextFormatted(
2936 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
2938 dd = new gmx_domdec_t;
2940 dd->comm = init_dd_comm();
2942 /* Initialize DD paritioning counters */
2943 dd->comm->partition_step = INT_MIN;
2946 set_dd_envvar_options(mdlog, dd, cr->nodeid);
2948 gmx_ddbox_t ddbox = {0};
2949 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
2954 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
2956 if (thisRankHasDuty(cr, DUTY_PP))
2958 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
2960 setup_neighbor_relations(dd);
2963 /* Set overallocation to avoid frequent reallocation of arrays */
2964 set_over_alloc_dd(TRUE);
2966 clear_dd_cycle_counts(dd);
2968 dd->atomSets = atomSets;
2973 static gmx_bool test_dd_cutoff(t_commrec *cr,
2974 const t_state &state,
2975 real cutoffRequested)
2985 set_ddbox(*dd, false, state.box, true, state.x, &ddbox);
2989 for (d = 0; d < dd->ndim; d++)
2993 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
2994 if (dynamic_dd_box(*dd))
2996 inv_cell_size *= DD_PRES_SCALE_MARGIN;
2999 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3001 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3003 if (np > dd->comm->cd[d].np_dlb)
3008 /* If a current local cell size is smaller than the requested
3009 * cut-off, we could still fix it, but this gets very complicated.
3010 * Without fixing here, we might actually need more checks.
3012 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3013 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3020 if (!isDlbDisabled(dd->comm))
3022 /* If DLB is not active yet, we don't need to check the grid jumps.
3023 * Actually we shouldn't, because then the grid jump data is not set.
3025 if (isDlbOn(dd->comm) &&
3026 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3031 gmx_sumi(1, &LocallyLimited, cr);
3033 if (LocallyLimited > 0)
3042 gmx_bool change_dd_cutoff(t_commrec *cr,
3043 const t_state &state,
3044 real cutoffRequested)
3046 gmx_bool bCutoffAllowed;
3048 bCutoffAllowed = test_dd_cutoff(cr, state, cutoffRequested);
3052 cr->dd->comm->cutoff = cutoffRequested;
3055 return bCutoffAllowed;