2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/options.h"
59 #include "gromacs/domdec/partition.h"
60 #include "gromacs/gmxlib/network.h"
61 #include "gromacs/gmxlib/nrnb.h"
62 #include "gromacs/gpu_utils/gpu_utils.h"
63 #include "gromacs/hardware/hw_info.h"
64 #include "gromacs/listed_forces/manage_threading.h"
65 #include "gromacs/math/vec.h"
66 #include "gromacs/math/vectypes.h"
67 #include "gromacs/mdlib/calc_verletbuf.h"
68 #include "gromacs/mdlib/constr.h"
69 #include "gromacs/mdlib/constraintrange.h"
70 #include "gromacs/mdlib/updategroups.h"
71 #include "gromacs/mdlib/vsite.h"
72 #include "gromacs/mdtypes/commrec.h"
73 #include "gromacs/mdtypes/forceoutput.h"
74 #include "gromacs/mdtypes/inputrec.h"
75 #include "gromacs/mdtypes/mdrunoptions.h"
76 #include "gromacs/mdtypes/state.h"
77 #include "gromacs/pbcutil/ishift.h"
78 #include "gromacs/pbcutil/pbc.h"
79 #include "gromacs/pulling/pull.h"
80 #include "gromacs/timing/wallcycle.h"
81 #include "gromacs/topology/block.h"
82 #include "gromacs/topology/idef.h"
83 #include "gromacs/topology/ifunc.h"
84 #include "gromacs/topology/mtop_lookup.h"
85 #include "gromacs/topology/mtop_util.h"
86 #include "gromacs/topology/topology.h"
87 #include "gromacs/utility/basedefinitions.h"
88 #include "gromacs/utility/basenetwork.h"
89 #include "gromacs/utility/cstringutil.h"
90 #include "gromacs/utility/exceptions.h"
91 #include "gromacs/utility/fatalerror.h"
92 #include "gromacs/utility/gmxmpi.h"
93 #include "gromacs/utility/logger.h"
94 #include "gromacs/utility/real.h"
95 #include "gromacs/utility/smalloc.h"
96 #include "gromacs/utility/strconvert.h"
97 #include "gromacs/utility/stringstream.h"
98 #include "gromacs/utility/stringutil.h"
99 #include "gromacs/utility/textwriter.h"
101 #include "atomdistribution.h"
103 #include "cellsizes.h"
104 #include "distribute.h"
105 #include "domdec_constraints.h"
106 #include "domdec_internal.h"
107 #include "domdec_vsite.h"
108 #include "redistribute.h"
111 // TODO remove this when moving domdec into gmx namespace
112 using gmx::DdRankOrder;
113 using gmx::DlbOption;
114 using gmx::DomdecOptions;
116 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
118 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
121 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
122 #define DD_FLAG_NRCG 65535
123 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
124 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
126 /* The DD zone order */
127 static const ivec dd_zo[DD_MAXZONE] =
128 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
130 /* The non-bonded zone-pair setup for domain decomposition
131 * The first number is the i-zone, the second number the first j-zone seen by
132 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
133 * As is, this is for 3D decomposition, where there are 4 i-zones.
134 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
135 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
138 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
145 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
147 static void index2xyz(ivec nc,int ind,ivec xyz)
149 xyz[XX] = ind % nc[XX];
150 xyz[YY] = (ind / nc[XX]) % nc[YY];
151 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
155 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
157 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
158 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
159 xyz[ZZ] = ind % nc[ZZ];
162 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
167 ddindex = dd_index(dd->nc, c);
168 if (dd->comm->bCartesianPP_PME)
170 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
172 else if (dd->comm->bCartesianPP)
175 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
186 int ddglatnr(const gmx_domdec_t *dd, int i)
196 if (i >= dd->comm->atomRanges.numAtomsTotal())
198 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
200 atnr = dd->globalAtomIndices[i] + 1;
206 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
208 return &dd->comm->cgs_gl;
211 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
213 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
214 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
217 void dd_store_state(gmx_domdec_t *dd, t_state *state)
221 if (state->ddp_count != dd->ddp_count)
223 gmx_incons("The MD state does not match the domain decomposition state");
226 state->cg_gl.resize(dd->ncg_home);
227 for (i = 0; i < dd->ncg_home; i++)
229 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
232 state->ddp_count_cg_gl = dd->ddp_count;
235 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
237 return &dd->comm->zones;
240 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
241 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
243 gmx_domdec_zones_t *zones;
246 zones = &dd->comm->zones;
249 while (icg >= zones->izone[izone].cg1)
258 else if (izone < zones->nizone)
260 *jcg0 = zones->izone[izone].jcg0;
264 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
265 icg, izone, zones->nizone);
268 *jcg1 = zones->izone[izone].jcg1;
270 for (d = 0; d < dd->ndim; d++)
273 shift0[dim] = zones->izone[izone].shift0[dim];
274 shift1[dim] = zones->izone[izone].shift1[dim];
275 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
277 /* A conservative approach, this can be optimized */
284 int dd_numHomeAtoms(const gmx_domdec_t &dd)
286 return dd.comm->atomRanges.numHomeAtoms();
289 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
291 /* We currently set mdatoms entries for all atoms:
292 * local + non-local + communicated for vsite + constraints
295 return dd->comm->atomRanges.numAtomsTotal();
298 int dd_natoms_vsite(const gmx_domdec_t *dd)
300 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
303 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
305 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
306 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
309 void dd_move_x(gmx_domdec_t *dd,
311 gmx::ArrayRef<gmx::RVec> x,
312 gmx_wallcycle *wcycle)
314 wallcycle_start(wcycle, ewcMOVEX);
317 gmx_domdec_comm_t *comm;
318 gmx_domdec_comm_dim_t *cd;
319 rvec shift = {0, 0, 0};
320 gmx_bool bPBC, bScrew;
325 nat_tot = comm->atomRanges.numHomeAtoms();
326 for (int d = 0; d < dd->ndim; d++)
328 bPBC = (dd->ci[dd->dim[d]] == 0);
329 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
332 copy_rvec(box[dd->dim[d]], shift);
335 for (const gmx_domdec_ind_t &ind : cd->ind)
337 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
338 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
342 for (int j : ind.index)
344 sendBuffer[n] = x[j];
350 for (int j : ind.index)
352 /* We need to shift the coordinates */
353 for (int d = 0; d < DIM; d++)
355 sendBuffer[n][d] = x[j][d] + shift[d];
362 for (int j : ind.index)
365 sendBuffer[n][XX] = x[j][XX] + shift[XX];
367 * This operation requires a special shift force
368 * treatment, which is performed in calc_vir.
370 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
371 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
376 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
378 gmx::ArrayRef<gmx::RVec> receiveBuffer;
379 if (cd->receiveInPlace)
381 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
385 receiveBuffer = receiveBufferAccess.buffer;
387 /* Send and receive the coordinates */
388 ddSendrecv(dd, d, dddirBackward,
389 sendBuffer, receiveBuffer);
391 if (!cd->receiveInPlace)
394 for (int zone = 0; zone < nzone; zone++)
396 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
398 x[i] = receiveBuffer[j++];
402 nat_tot += ind.nrecv[nzone+1];
407 wallcycle_stop(wcycle, ewcMOVEX);
410 void dd_move_f(gmx_domdec_t *dd,
411 gmx::ForceWithShiftForces *forceWithShiftForces,
412 gmx_wallcycle *wcycle)
414 wallcycle_start(wcycle, ewcMOVEF);
416 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
417 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
419 gmx_domdec_comm_t &comm = *dd->comm;
420 int nzone = comm.zones.n/2;
421 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
422 for (int d = dd->ndim-1; d >= 0; d--)
424 /* Only forces in domains near the PBC boundaries need to
425 consider PBC in the treatment of fshift */
426 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
427 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
428 /* Determine which shift vector we need */
429 ivec vis = { 0, 0, 0 };
431 const int is = IVEC2IS(vis);
433 /* Loop over the pulses */
434 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
435 for (int p = cd.numPulses() - 1; p >= 0; p--)
437 const gmx_domdec_ind_t &ind = cd.ind[p];
438 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
439 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
441 nat_tot -= ind.nrecv[nzone+1];
443 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
445 gmx::ArrayRef<gmx::RVec> sendBuffer;
446 if (cd.receiveInPlace)
448 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
452 sendBuffer = sendBufferAccess.buffer;
454 for (int zone = 0; zone < nzone; zone++)
456 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
458 sendBuffer[j++] = f[i];
462 /* Communicate the forces */
463 ddSendrecv(dd, d, dddirForward,
464 sendBuffer, receiveBuffer);
465 /* Add the received forces */
467 if (!shiftForcesNeedPbc)
469 for (int j : ind.index)
471 for (int d = 0; d < DIM; d++)
473 f[j][d] += receiveBuffer[n][d];
478 else if (!applyScrewPbc)
480 for (int j : ind.index)
482 for (int d = 0; d < DIM; d++)
484 f[j][d] += receiveBuffer[n][d];
486 /* Add this force to the shift force */
487 for (int d = 0; d < DIM; d++)
489 fshift[is][d] += receiveBuffer[n][d];
496 for (int j : ind.index)
498 /* Rotate the force */
499 f[j][XX] += receiveBuffer[n][XX];
500 f[j][YY] -= receiveBuffer[n][YY];
501 f[j][ZZ] -= receiveBuffer[n][ZZ];
502 if (shiftForcesNeedPbc)
504 /* Add this force to the shift force */
505 for (int d = 0; d < DIM; d++)
507 fshift[is][d] += receiveBuffer[n][d];
516 wallcycle_stop(wcycle, ewcMOVEF);
519 /* Convenience function for extracting a real buffer from an rvec buffer
521 * To reduce the number of temporary communication buffers and avoid
522 * cache polution, we reuse gmx::RVec buffers for storing reals.
523 * This functions return a real buffer reference with the same number
524 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
526 static gmx::ArrayRef<real>
527 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
529 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
533 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
536 gmx_domdec_comm_t *comm;
537 gmx_domdec_comm_dim_t *cd;
542 nat_tot = comm->atomRanges.numHomeAtoms();
543 for (int d = 0; d < dd->ndim; d++)
546 for (const gmx_domdec_ind_t &ind : cd->ind)
548 /* Note: We provision for RVec instead of real, so a factor of 3
549 * more than needed. The buffer actually already has this size
550 * and we pass a plain pointer below, so this does not matter.
552 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
553 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
555 for (int j : ind.index)
557 sendBuffer[n++] = v[j];
560 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
562 gmx::ArrayRef<real> receiveBuffer;
563 if (cd->receiveInPlace)
565 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
569 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
571 /* Send and receive the data */
572 ddSendrecv(dd, d, dddirBackward,
573 sendBuffer, receiveBuffer);
574 if (!cd->receiveInPlace)
577 for (int zone = 0; zone < nzone; zone++)
579 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
581 v[i] = receiveBuffer[j++];
585 nat_tot += ind.nrecv[nzone+1];
591 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
594 gmx_domdec_comm_t *comm;
595 gmx_domdec_comm_dim_t *cd;
599 nzone = comm->zones.n/2;
600 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
601 for (int d = dd->ndim-1; d >= 0; d--)
604 for (int p = cd->numPulses() - 1; p >= 0; p--)
606 const gmx_domdec_ind_t &ind = cd->ind[p];
608 /* Note: We provision for RVec instead of real, so a factor of 3
609 * more than needed. The buffer actually already has this size
610 * and we typecast, so this works as intended.
612 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
613 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
614 nat_tot -= ind.nrecv[nzone + 1];
616 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
618 gmx::ArrayRef<real> sendBuffer;
619 if (cd->receiveInPlace)
621 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
625 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
627 for (int zone = 0; zone < nzone; zone++)
629 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
631 sendBuffer[j++] = v[i];
635 /* Communicate the forces */
636 ddSendrecv(dd, d, dddirForward,
637 sendBuffer, receiveBuffer);
638 /* Add the received forces */
640 for (int j : ind.index)
642 v[j] += receiveBuffer[n];
650 real dd_cutoff_multibody(const gmx_domdec_t *dd)
652 const gmx_domdec_comm_t &comm = *dd->comm;
653 const DDSystemInfo &systemInfo = comm.systemInfo;
656 if (systemInfo.haveInterDomainMultiBodyBondeds)
658 if (comm.cutoff_mbody > 0)
660 r = comm.cutoff_mbody;
664 /* cutoff_mbody=0 means we do not have DLB */
665 r = comm.cellsize_min[dd->dim[0]];
666 for (int di = 1; di < dd->ndim; di++)
668 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
672 r = std::max(r, comm.cutoff_mbody);
676 r = std::min(r, systemInfo.cutoff);
684 real dd_cutoff_twobody(const gmx_domdec_t *dd)
688 r_mb = dd_cutoff_multibody(dd);
690 return std::max(dd->comm->systemInfo.cutoff, r_mb);
694 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
699 nc = dd->nc[dd->comm->cartpmedim];
700 ntot = dd->comm->ntot[dd->comm->cartpmedim];
701 copy_ivec(coord, coord_pme);
702 coord_pme[dd->comm->cartpmedim] =
703 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
706 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
711 npme = dd->comm->npmenodes;
713 /* Here we assign a PME node to communicate with this DD node
714 * by assuming that the major index of both is x.
715 * We add cr->npmenodes/2 to obtain an even distribution.
717 return (ddindex*npme + npme/2)/npp;
720 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
725 snew(pme_rank, dd->comm->npmenodes);
727 for (i = 0; i < dd->nnodes; i++)
729 p0 = ddindex2pmeindex(dd, i);
730 p1 = ddindex2pmeindex(dd, i+1);
731 if (i+1 == dd->nnodes || p1 > p0)
735 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
737 pme_rank[n] = i + 1 + n;
745 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
753 if (dd->comm->bCartesian) {
754 gmx_ddindex2xyz(dd->nc,ddindex,coords);
755 dd_coords2pmecoords(dd,coords,coords_pme);
756 copy_ivec(dd->ntot,nc);
757 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
758 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
760 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
762 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
768 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
773 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
775 gmx_domdec_comm_t *comm;
777 int ddindex, nodeid = -1;
784 if (comm->bCartesianPP_PME)
787 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
792 ddindex = dd_index(cr->dd->nc, coords);
793 if (comm->bCartesianPP)
795 nodeid = comm->ddindex2simnodeid[ddindex];
801 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
813 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
814 const t_commrec gmx_unused *cr,
819 const gmx_domdec_comm_t *comm = dd->comm;
821 /* This assumes a uniform x domain decomposition grid cell size */
822 if (comm->bCartesianPP_PME)
825 ivec coord, coord_pme;
826 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
827 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
829 /* This is a PP node */
830 dd_cart_coord2pmecoord(dd, coord, coord_pme);
831 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
835 else if (comm->bCartesianPP)
837 if (sim_nodeid < dd->nnodes)
839 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
844 /* This assumes DD cells with identical x coordinates
845 * are numbered sequentially.
847 if (dd->comm->pmenodes == nullptr)
849 if (sim_nodeid < dd->nnodes)
851 /* The DD index equals the nodeid */
852 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
858 while (sim_nodeid > dd->comm->pmenodes[i])
862 if (sim_nodeid < dd->comm->pmenodes[i])
864 pmenode = dd->comm->pmenodes[i];
872 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
877 dd->comm->npmenodes_x, dd->comm->npmenodes_y
888 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
892 ivec coord, coord_pme;
896 std::vector<int> ddranks;
897 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
899 for (x = 0; x < dd->nc[XX]; x++)
901 for (y = 0; y < dd->nc[YY]; y++)
903 for (z = 0; z < dd->nc[ZZ]; z++)
905 if (dd->comm->bCartesianPP_PME)
910 dd_cart_coord2pmecoord(dd, coord, coord_pme);
911 if (dd->ci[XX] == coord_pme[XX] &&
912 dd->ci[YY] == coord_pme[YY] &&
913 dd->ci[ZZ] == coord_pme[ZZ])
915 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
920 /* The slab corresponds to the nodeid in the PME group */
921 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
923 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
932 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
934 gmx_bool bReceive = TRUE;
936 if (cr->npmenodes < dd->nnodes)
938 gmx_domdec_comm_t *comm = dd->comm;
939 if (comm->bCartesianPP_PME)
942 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
944 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
945 coords[comm->cartpmedim]++;
946 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
949 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
950 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
952 /* This is not the last PP node for pmenode */
957 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
962 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
963 if (cr->sim_nodeid+1 < cr->nnodes &&
964 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
966 /* This is not the last PP node for pmenode */
975 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
977 gmx_domdec_comm_t *comm;
982 snew(*dim_f, dd->nc[dim]+1);
984 for (i = 1; i < dd->nc[dim]; i++)
986 if (comm->slb_frac[dim])
988 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
992 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
995 (*dim_f)[dd->nc[dim]] = 1;
998 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1000 int pmeindex, slab, nso, i;
1003 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1009 ddpme->dim = dimind;
1011 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1013 ddpme->nslab = (ddpme->dim == 0 ?
1014 dd->comm->npmenodes_x :
1015 dd->comm->npmenodes_y);
1017 if (ddpme->nslab <= 1)
1022 nso = dd->comm->npmenodes/ddpme->nslab;
1023 /* Determine for each PME slab the PP location range for dimension dim */
1024 snew(ddpme->pp_min, ddpme->nslab);
1025 snew(ddpme->pp_max, ddpme->nslab);
1026 for (slab = 0; slab < ddpme->nslab; slab++)
1028 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1029 ddpme->pp_max[slab] = 0;
1031 for (i = 0; i < dd->nnodes; i++)
1033 ddindex2xyz(dd->nc, i, xyz);
1034 /* For y only use our y/z slab.
1035 * This assumes that the PME x grid size matches the DD grid size.
1037 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1039 pmeindex = ddindex2pmeindex(dd, i);
1042 slab = pmeindex/nso;
1046 slab = pmeindex % ddpme->nslab;
1048 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1049 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1053 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1056 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1058 if (dd->comm->ddpme[0].dim == XX)
1060 return dd->comm->ddpme[0].maxshift;
1068 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1070 if (dd->comm->ddpme[0].dim == YY)
1072 return dd->comm->ddpme[0].maxshift;
1074 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1076 return dd->comm->ddpme[1].maxshift;
1084 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1086 /* Note that the cycles value can be incorrect, either 0 or some
1087 * extremely large value, when our thread migrated to another core
1088 * with an unsynchronized cycle counter. If this happens less often
1089 * that once per nstlist steps, this will not cause issues, since
1090 * we later subtract the maximum value from the sum over nstlist steps.
1091 * A zero count will slightly lower the total, but that's a small effect.
1092 * Note that the main purpose of the subtraction of the maximum value
1093 * is to avoid throwing off the load balancing when stalls occur due
1094 * e.g. system activity or network congestion.
1096 dd->comm->cycl[ddCycl] += cycles;
1097 dd->comm->cycl_n[ddCycl]++;
1098 if (cycles > dd->comm->cycl_max[ddCycl])
1100 dd->comm->cycl_max[ddCycl] = cycles;
1105 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1110 gmx_bool bPartOfGroup = FALSE;
1112 dim = dd->dim[dim_ind];
1113 copy_ivec(loc, loc_c);
1114 for (i = 0; i < dd->nc[dim]; i++)
1117 rank = dd_index(dd->nc, loc_c);
1118 if (rank == dd->rank)
1120 /* This process is part of the group */
1121 bPartOfGroup = TRUE;
1124 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1128 dd->comm->mpi_comm_load[dim_ind] = c_row;
1129 if (!isDlbDisabled(dd->comm))
1131 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1133 if (dd->ci[dim] == dd->master_ci[dim])
1135 /* This is the root process of this row */
1136 cellsizes.rowMaster = std::make_unique<RowMaster>();
1138 RowMaster &rowMaster = *cellsizes.rowMaster;
1139 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1140 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1141 rowMaster.isCellMin.resize(dd->nc[dim]);
1144 rowMaster.bounds.resize(dd->nc[dim]);
1146 rowMaster.buf_ncd.resize(dd->nc[dim]);
1150 /* This is not a root process, we only need to receive cell_f */
1151 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1154 if (dd->ci[dim] == dd->master_ci[dim])
1156 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1162 void dd_setup_dlb_resource_sharing(t_commrec *cr,
1166 int physicalnode_id_hash;
1168 MPI_Comm mpi_comm_pp_physicalnode;
1170 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1172 /* Only ranks with short-ranged tasks (currently) use GPUs.
1173 * If we don't have GPUs assigned, there are no resources to share.
1178 physicalnode_id_hash = gmx_physicalnode_id_hash();
1184 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1185 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1186 dd->rank, physicalnode_id_hash, gpu_id);
1188 /* Split the PP communicator over the physical nodes */
1189 /* TODO: See if we should store this (before), as it's also used for
1190 * for the nodecomm summation.
1192 // TODO PhysicalNodeCommunicator could be extended/used to handle
1193 // the need for per-node per-group communicators.
1194 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1195 &mpi_comm_pp_physicalnode);
1196 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1197 &dd->comm->mpi_comm_gpu_shared);
1198 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1199 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1203 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1206 /* Note that some ranks could share a GPU, while others don't */
1208 if (dd->comm->nrank_gpu_shared == 1)
1210 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1213 GMX_UNUSED_VALUE(cr);
1214 GMX_UNUSED_VALUE(gpu_id);
1218 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1221 int dim0, dim1, i, j;
1226 fprintf(debug, "Making load communicators\n");
1229 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1230 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1238 make_load_communicator(dd, 0, loc);
1242 for (i = 0; i < dd->nc[dim0]; i++)
1245 make_load_communicator(dd, 1, loc);
1251 for (i = 0; i < dd->nc[dim0]; i++)
1255 for (j = 0; j < dd->nc[dim1]; j++)
1258 make_load_communicator(dd, 2, loc);
1265 fprintf(debug, "Finished making load communicators\n");
1270 /*! \brief Sets up the relation between neighboring domains and zones */
1271 static void setup_neighbor_relations(gmx_domdec_t *dd)
1273 int d, dim, i, j, m;
1275 gmx_domdec_zones_t *zones;
1276 gmx_domdec_ns_ranges_t *izone;
1277 GMX_ASSERT(dd->ndim >= 0, "Must have non-negative number of dimensions for DD");
1279 for (d = 0; d < dd->ndim; d++)
1282 copy_ivec(dd->ci, tmp);
1283 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1284 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1285 copy_ivec(dd->ci, tmp);
1286 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1287 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1290 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1293 dd->neighbor[d][1]);
1297 int nzone = (1 << dd->ndim);
1298 GMX_ASSERT(dd->ndim < DIM, "Invalid number of dimensions");
1299 int nizone = (1 << std::max(dd->ndim - 1, 0));
1300 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1302 zones = &dd->comm->zones;
1304 for (i = 0; i < nzone; i++)
1307 clear_ivec(zones->shift[i]);
1308 for (d = 0; d < dd->ndim; d++)
1310 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1315 for (i = 0; i < nzone; i++)
1317 for (d = 0; d < DIM; d++)
1319 s[d] = dd->ci[d] - zones->shift[i][d];
1324 else if (s[d] >= dd->nc[d])
1330 zones->nizone = nizone;
1331 for (i = 0; i < zones->nizone; i++)
1333 assert(ddNonbondedZonePairRanges[i][0] == i);
1335 izone = &zones->izone[i];
1336 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1337 * j-zones up to nzone.
1339 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1340 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1341 for (dim = 0; dim < DIM; dim++)
1343 if (dd->nc[dim] == 1)
1345 /* All shifts should be allowed */
1346 izone->shift0[dim] = -1;
1347 izone->shift1[dim] = 1;
1351 /* Determine the min/max j-zone shift wrt the i-zone */
1352 izone->shift0[dim] = 1;
1353 izone->shift1[dim] = -1;
1354 for (j = izone->j0; j < izone->j1; j++)
1356 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1357 if (shift_diff < izone->shift0[dim])
1359 izone->shift0[dim] = shift_diff;
1361 if (shift_diff > izone->shift1[dim])
1363 izone->shift1[dim] = shift_diff;
1370 if (!isDlbDisabled(dd->comm))
1372 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1375 if (dd->comm->bRecordLoad)
1377 make_load_communicators(dd);
1381 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1383 t_commrec gmx_unused *cr,
1384 bool gmx_unused reorder)
1387 gmx_domdec_comm_t *comm;
1394 if (comm->bCartesianPP)
1396 /* Set up cartesian communication for the particle-particle part */
1397 GMX_LOG(mdlog.info).appendTextFormatted(
1398 "Will use a Cartesian communicator: %d x %d x %d",
1399 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1401 for (int i = 0; i < DIM; i++)
1405 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1407 /* We overwrite the old communicator with the new cartesian one */
1408 cr->mpi_comm_mygroup = comm_cart;
1411 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1412 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1414 if (comm->bCartesianPP_PME)
1416 /* Since we want to use the original cartesian setup for sim,
1417 * and not the one after split, we need to make an index.
1419 snew(comm->ddindex2ddnodeid, dd->nnodes);
1420 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1421 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
1422 /* Get the rank of the DD master,
1423 * above we made sure that the master node is a PP node.
1433 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1435 else if (comm->bCartesianPP)
1437 if (cr->npmenodes == 0)
1439 /* The PP communicator is also
1440 * the communicator for this simulation
1442 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1444 cr->nodeid = dd->rank;
1446 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1448 /* We need to make an index to go from the coordinates
1449 * to the nodeid of this simulation.
1451 snew(comm->ddindex2simnodeid, dd->nnodes);
1452 snew(buf, dd->nnodes);
1453 if (thisRankHasDuty(cr, DUTY_PP))
1455 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1457 /* Communicate the ddindex to simulation nodeid index */
1458 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1459 cr->mpi_comm_mysim);
1462 /* Determine the master coordinates and rank.
1463 * The DD master should be the same node as the master of this sim.
1465 for (int i = 0; i < dd->nnodes; i++)
1467 if (comm->ddindex2simnodeid[i] == 0)
1469 ddindex2xyz(dd->nc, i, dd->master_ci);
1470 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1475 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1480 /* No Cartesian communicators */
1481 /* We use the rank in dd->comm->all as DD index */
1482 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1483 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1485 clear_ivec(dd->master_ci);
1489 GMX_LOG(mdlog.info).appendTextFormatted(
1490 "Domain decomposition rank %d, coordinates %d %d %d\n",
1491 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1495 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1496 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1500 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1504 gmx_domdec_comm_t *comm = dd->comm;
1506 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
1509 snew(comm->ddindex2simnodeid, dd->nnodes);
1510 snew(buf, dd->nnodes);
1511 if (thisRankHasDuty(cr, DUTY_PP))
1513 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1515 /* Communicate the ddindex to simulation nodeid index */
1516 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
1517 cr->mpi_comm_mysim);
1521 GMX_UNUSED_VALUE(dd);
1522 GMX_UNUSED_VALUE(cr);
1526 static void split_communicator(const gmx::MDLogger &mdlog,
1527 t_commrec *cr, gmx_domdec_t *dd,
1528 DdRankOrder gmx_unused rankOrder,
1529 bool gmx_unused reorder)
1531 gmx_domdec_comm_t *comm;
1540 if (comm->bCartesianPP)
1542 for (i = 1; i < DIM; i++)
1544 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
1546 if (bDiv[YY] || bDiv[ZZ])
1548 comm->bCartesianPP_PME = TRUE;
1549 /* If we have 2D PME decomposition, which is always in x+y,
1550 * we stack the PME only nodes in z.
1551 * Otherwise we choose the direction that provides the thinnest slab
1552 * of PME only nodes as this will have the least effect
1553 * on the PP communication.
1554 * But for the PME communication the opposite might be better.
1556 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
1558 dd->nc[YY] > dd->nc[ZZ]))
1560 comm->cartpmedim = ZZ;
1564 comm->cartpmedim = YY;
1566 comm->ntot[comm->cartpmedim]
1567 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
1571 GMX_LOG(mdlog.info).appendTextFormatted(
1572 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1573 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
1574 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1578 if (comm->bCartesianPP_PME)
1584 GMX_LOG(mdlog.info).appendTextFormatted(
1585 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1586 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
1588 for (i = 0; i < DIM; i++)
1592 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
1594 MPI_Comm_rank(comm_cart, &rank);
1595 if (MASTER(cr) && rank != 0)
1597 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1600 /* With this assigment we loose the link to the original communicator
1601 * which will usually be MPI_COMM_WORLD, unless have multisim.
1603 cr->mpi_comm_mysim = comm_cart;
1604 cr->sim_nodeid = rank;
1606 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
1608 GMX_LOG(mdlog.info).appendTextFormatted(
1609 "Cartesian rank %d, coordinates %d %d %d\n",
1610 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1612 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1616 if (cr->npmenodes == 0 ||
1617 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
1619 cr->duty = DUTY_PME;
1622 /* Split the sim communicator into PP and PME only nodes */
1623 MPI_Comm_split(cr->mpi_comm_mysim,
1624 getThisRankDuties(cr),
1625 dd_index(comm->ntot, dd->ci),
1626 &cr->mpi_comm_mygroup);
1633 case DdRankOrder::pp_pme:
1634 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1636 case DdRankOrder::interleave:
1637 /* Interleave the PP-only and PME-only ranks */
1638 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1639 comm->pmenodes = dd_interleaved_pme_ranks(dd);
1641 case DdRankOrder::cartesian:
1644 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
1647 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
1649 cr->duty = DUTY_PME;
1656 /* Split the sim communicator into PP and PME only nodes */
1657 MPI_Comm_split(cr->mpi_comm_mysim,
1658 getThisRankDuties(cr),
1660 &cr->mpi_comm_mygroup);
1661 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1665 GMX_LOG(mdlog.info).appendTextFormatted(
1666 "This rank does only %s work.\n",
1667 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1670 /*! \brief Generates the MPI communicators for domain decomposition */
1671 static void make_dd_communicators(const gmx::MDLogger &mdlog,
1673 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
1675 gmx_domdec_comm_t *comm;
1680 copy_ivec(dd->nc, comm->ntot);
1682 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1683 comm->bCartesianPP_PME = FALSE;
1685 /* Reorder the nodes by default. This might change the MPI ranks.
1686 * Real reordering is only supported on very few architectures,
1687 * Blue Gene is one of them.
1689 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
1691 if (cr->npmenodes > 0)
1693 /* Split the communicator into a PP and PME part */
1694 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
1695 if (comm->bCartesianPP_PME)
1697 /* We (possibly) reordered the nodes in split_communicator,
1698 * so it is no longer required in make_pp_communicator.
1700 CartReorder = FALSE;
1705 /* All nodes do PP and PME */
1707 /* We do not require separate communicators */
1708 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1712 if (thisRankHasDuty(cr, DUTY_PP))
1714 /* Copy or make a new PP communicator */
1715 make_pp_communicator(mdlog, dd, cr, CartReorder);
1719 receive_ddindex2simnodeid(dd, cr);
1722 if (!thisRankHasDuty(cr, DUTY_PME))
1724 /* Set up the commnuication to our PME node */
1725 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1726 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
1729 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1730 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1735 dd->pme_nodeid = -1;
1738 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1741 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1743 comm->cgs_gl.index[comm->cgs_gl.nr]);
1747 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1748 const char *dir, int nc, const char *size_string)
1750 real *slb_frac, tot;
1755 if (nc > 1 && size_string != nullptr)
1757 GMX_LOG(mdlog.info).appendTextFormatted(
1758 "Using static load balancing for the %s direction", dir);
1761 for (i = 0; i < nc; i++)
1764 sscanf(size_string, "%20lf%n", &dbl, &n);
1767 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1774 std::string relativeCellSizes = "Relative cell sizes:";
1775 for (i = 0; i < nc; i++)
1778 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1780 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1786 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1789 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1791 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1793 for (auto &ilist : extractILists(*ilists, IF_BOND))
1795 if (NRAL(ilist.functionType) > 2)
1797 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1805 static int dd_getenv(const gmx::MDLogger &mdlog,
1806 const char *env_var, int def)
1812 val = getenv(env_var);
1815 if (sscanf(val, "%20d", &nst) <= 0)
1819 GMX_LOG(mdlog.info).appendTextFormatted(
1820 "Found env.var. %s = %s, using value %d",
1827 static void check_dd_restrictions(const gmx_domdec_t *dd,
1828 const t_inputrec *ir,
1829 const gmx::MDLogger &mdlog)
1831 if (ir->ePBC == epbcSCREW &&
1832 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1834 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1837 if (ir->ns_type == ensSIMPLE)
1839 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
1842 if (ir->nstlist == 0)
1844 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1847 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1849 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1853 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1854 const ivec numDomains)
1856 real r = ddbox.box_size[XX];
1857 for (int d = 0; d < DIM; d++)
1859 if (numDomains[d] > 1)
1861 /* Check using the initial average cell size */
1862 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1869 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1871 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1872 const std::string &reasonStr,
1873 const gmx::MDLogger &mdlog)
1875 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1876 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1878 if (cmdlineDlbState == DlbState::onUser)
1880 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1882 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1884 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1886 return DlbState::offForever;
1889 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1891 * This function parses the parameters of "-dlb" command line option setting
1892 * corresponding state values. Then it checks the consistency of the determined
1893 * state with other run parameters and settings. As a result, the initial state
1894 * may be altered or an error may be thrown if incompatibility of options is detected.
1896 * \param [in] mdlog Logger.
1897 * \param [in] dlbOption Enum value for the DLB option.
1898 * \param [in] bRecordLoad True if the load balancer is recording load information.
1899 * \param [in] mdrunOptions Options for mdrun.
1900 * \param [in] ir Pointer mdrun to input parameters.
1901 * \returns DLB initial/startup state.
1903 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1904 DlbOption dlbOption, gmx_bool bRecordLoad,
1905 const gmx::MdrunOptions &mdrunOptions,
1906 const t_inputrec *ir)
1908 DlbState dlbState = DlbState::offCanTurnOn;
1912 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1913 case DlbOption::no: dlbState = DlbState::offUser; break;
1914 case DlbOption::yes: dlbState = DlbState::onUser; break;
1915 default: gmx_incons("Invalid dlbOption enum value");
1918 /* Reruns don't support DLB: bail or override auto mode */
1919 if (mdrunOptions.rerun)
1921 std::string reasonStr = "it is not supported in reruns.";
1922 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1925 /* Unsupported integrators */
1926 if (!EI_DYNAMICS(ir->eI))
1928 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1929 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1932 /* Without cycle counters we can't time work to balance on */
1935 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1936 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1939 if (mdrunOptions.reproducible)
1941 std::string reasonStr = "you started a reproducible run.";
1944 case DlbState::offUser:
1946 case DlbState::offForever:
1947 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1949 case DlbState::offCanTurnOn:
1950 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1951 case DlbState::onCanTurnOff:
1952 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1954 case DlbState::onUser:
1955 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1957 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1964 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
1967 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
1969 /* Decomposition order z,y,x */
1970 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
1971 for (int dim = DIM-1; dim >= 0; dim--)
1973 if (dd->nc[dim] > 1)
1975 dd->dim[dd->ndim++] = dim;
1981 /* Decomposition order x,y,z */
1982 for (int dim = 0; dim < DIM; dim++)
1984 if (dd->nc[dim] > 1)
1986 dd->dim[dd->ndim++] = dim;
1993 /* Set dim[0] to avoid extra checks on ndim in several places */
1998 static gmx_domdec_comm_t *init_dd_comm()
2000 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
2002 comm->n_load_have = 0;
2003 comm->n_load_collect = 0;
2005 comm->haveTurnedOffDlb = false;
2007 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
2009 comm->sum_nat[i] = 0;
2013 comm->load_step = 0;
2016 clear_ivec(comm->load_lim);
2020 /* This should be replaced by a unique pointer */
2021 comm->balanceRegion = ddBalanceRegionAllocate();
2026 /* Returns whether mtop contains constraints and/or vsites */
2027 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
2029 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
2031 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
2033 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
2042 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
2043 const gmx_mtop_t &mtop,
2044 const t_inputrec &inputrec,
2045 const real cutoffMargin,
2046 DDSystemInfo *systemInfo)
2048 /* When we have constraints and/or vsites, it is beneficial to use
2049 * update groups (when possible) to allow independent update of groups.
2051 if (!systemHasConstraintsOrVsites(mtop))
2053 /* No constraints or vsites, atoms can be updated independently */
2057 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2058 systemInfo->useUpdateGroups =
2059 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2060 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2062 if (systemInfo->useUpdateGroups)
2064 int numUpdateGroups = 0;
2065 for (const auto &molblock : mtop.molblock)
2067 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2070 systemInfo->maxUpdateGroupRadius =
2071 computeMaxUpdateGroupRadius(mtop,
2072 systemInfo->updateGroupingPerMoleculetype,
2073 maxReferenceTemperature(inputrec));
2075 /* To use update groups, the large domain-to-domain cutoff distance
2076 * should be compatible with the box size.
2078 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2080 if (systemInfo->useUpdateGroups)
2082 GMX_LOG(mdlog.info).appendTextFormatted(
2083 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2085 mtop.natoms/static_cast<double>(numUpdateGroups),
2086 systemInfo->maxUpdateGroupRadius);
2090 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2091 systemInfo->updateGroupingPerMoleculetype.clear();
2096 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2097 npbcdim(ePBC2npbcdim(ir.ePBC)),
2098 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2099 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2100 haveScrewPBC(ir.ePBC == epbcSCREW)
2104 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2105 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
2106 t_commrec *cr, gmx_domdec_t *dd,
2107 const DomdecOptions &options,
2108 const gmx::MdrunOptions &mdrunOptions,
2109 const gmx_mtop_t *mtop,
2110 const t_inputrec *ir,
2112 gmx::ArrayRef<const gmx::RVec> xGlobal,
2116 real r_bonded_limit = -1;
2117 const real tenPercentMargin = 1.1;
2118 gmx_domdec_comm_t *comm = dd->comm;
2120 /* Initialize to GPU share count to 0, might change later */
2121 comm->nrank_gpu_shared = 0;
2123 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
2124 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2125 /* To consider turning DLB on after 2*nstlist steps we need to check
2126 * at partitioning count 3. Thus we need to increase the first count by 2.
2128 comm->ddPartioningCountFirstDlbOff += 2;
2130 GMX_LOG(mdlog.info).appendTextFormatted(
2131 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
2133 comm->bPMELoadBalDLBLimits = FALSE;
2135 /* Allocate the charge group/atom sorting struct */
2136 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2138 /* Generate the simulation system information */
2139 DDSystemInfo &systemInfo = comm->systemInfo;
2141 /* We need to decide on update groups early, as this affects communication distances */
2142 systemInfo.useUpdateGroups = false;
2143 if (ir->cutoff_scheme == ecutsVERLET)
2145 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2146 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2148 if (systemInfo.useUpdateGroups)
2150 /* Note: We would like to use dd->nnodes for the atom count estimate,
2151 * but that is not yet available here. But this anyhow only
2152 * affect performance up to the second dd_partition_system call.
2154 const int homeAtomCountEstimate = mtop->natoms/cr->nnodes;
2155 comm->updateGroupsCog =
2156 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2157 systemInfo.updateGroupingPerMoleculetype,
2158 maxReferenceTemperature(*ir),
2159 homeAtomCountEstimate);
2163 // TODO: Check whether all bondeds are within update groups
2164 systemInfo.haveInterDomainBondeds = (mtop->natoms > gmx_mtop_num_molecules(*mtop) ||
2165 mtop->bIntermolecularInteractions);
2166 systemInfo.haveInterDomainMultiBodyBondeds = (multi_body_bondeds_count(mtop) > 0);
2168 if (systemInfo.useUpdateGroups)
2170 dd->splitConstraints = false;
2171 dd->splitSettles = false;
2175 dd->splitConstraints = gmx::inter_charge_group_constraints(*mtop);
2176 dd->splitSettles = gmx::inter_charge_group_settles(*mtop);
2181 /* Set the cut-off to some very large value,
2182 * so we don't need if statements everywhere in the code.
2183 * We use sqrt, since the cut-off is squared in some places.
2185 systemInfo.cutoff = GMX_CUTOFF_INF;
2189 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2191 systemInfo.minCutoffForMultiBody = 0;
2193 /* Determine the minimum cell size limit, affected by many factors */
2194 systemInfo.cellsizeLimit = 0;
2195 comm->bBondComm = FALSE;
2197 /* We do not allow home atoms to move beyond the neighboring domain
2198 * between domain decomposition steps, which limits the cell size.
2199 * Get an estimate of cell size limit due to atom displacement.
2200 * In most cases this is a large overestimate, because it assumes
2201 * non-interaction atoms.
2202 * We set the chance to 1 in a trillion steps.
2204 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2205 const real limitForAtomDisplacement =
2206 minCellSizeForAtomDisplacement(*mtop, *ir,
2207 systemInfo.updateGroupingPerMoleculetype,
2208 c_chanceThatAtomMovesBeyondDomain);
2209 GMX_LOG(mdlog.info).appendTextFormatted(
2210 "Minimum cell size due to atom displacement: %.3f nm",
2211 limitForAtomDisplacement);
2213 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2214 limitForAtomDisplacement);
2216 /* TODO: PME decomposition currently requires atoms not to be more than
2217 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2218 * In nearly all cases, limitForAtomDisplacement will be smaller
2219 * than 2/3*rlist, so the PME requirement is satisfied.
2220 * But it would be better for both correctness and performance
2221 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2222 * Note that we would need to improve the pairlist buffer case.
2225 if (systemInfo.haveInterDomainBondeds)
2227 if (options.minimumCommunicationRange > 0)
2229 systemInfo.minCutoffForMultiBody =
2230 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2231 if (options.useBondedCommunication)
2233 comm->bBondComm = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2237 systemInfo.cutoff = std::max(systemInfo.cutoff,
2238 systemInfo.minCutoffForMultiBody);
2240 r_bonded_limit = systemInfo.minCutoffForMultiBody;
2242 else if (ir->bPeriodicMols)
2244 /* Can not easily determine the required cut-off */
2245 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2246 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2247 r_bonded_limit = systemInfo.minCutoffForMultiBody;
2255 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2256 options.checkBondedInteractions,
2259 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2260 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2262 /* We use an initial margin of 10% for the minimum cell size,
2263 * except when we are just below the non-bonded cut-off.
2265 if (options.useBondedCommunication)
2267 if (std::max(r_2b, r_mb) > comm->systemInfo.cutoff)
2269 r_bonded = std::max(r_2b, r_mb);
2270 r_bonded_limit = tenPercentMargin*r_bonded;
2271 comm->bBondComm = TRUE;
2276 r_bonded_limit = std::min(tenPercentMargin*r_bonded, systemInfo.cutoff);
2278 /* We determine cutoff_mbody later */
2282 /* No special bonded communication,
2283 * simply increase the DD cut-off.
2285 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
2286 systemInfo.minCutoffForMultiBody = r_bonded_limit;
2287 systemInfo.cutoff = std::max(systemInfo.cutoff,
2288 systemInfo.minCutoffForMultiBody);
2291 GMX_LOG(mdlog.info).appendTextFormatted(
2292 "Minimum cell size due to bonded interactions: %.3f nm",
2295 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, r_bonded_limit);
2299 if (dd->splitConstraints && options.constraintCommunicationRange <= 0)
2301 /* There is a cell size limit due to the constraints (P-LINCS) */
2302 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
2303 GMX_LOG(mdlog.info).appendTextFormatted(
2304 "Estimated maximum distance required for P-LINCS: %.3f nm",
2306 if (rconstr > systemInfo.cellsizeLimit)
2308 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2311 else if (options.constraintCommunicationRange > 0)
2313 /* Here we do not check for dd->splitConstraints.
2314 * because one can also set a cell size limit for virtual sites only
2315 * and at this point we don't know yet if there are intercg v-sites.
2317 GMX_LOG(mdlog.info).appendTextFormatted(
2318 "User supplied maximum distance required for P-LINCS: %.3f nm",
2319 options.constraintCommunicationRange);
2320 rconstr = options.constraintCommunicationRange;
2322 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, rconstr);
2324 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
2327 if (options.numCells[XX] > 0)
2329 copy_ivec(options.numCells, ddSetup.numDomains);
2330 set_ddbox_cr(*cr, &ddSetup.numDomains, *ir, box, xGlobal, ddbox);
2332 if (options.numPmeRanks >= 0)
2334 ddSetup.numPmeRanks = options.numPmeRanks;
2338 /* When the DD grid is set explicitly and -npme is set to auto,
2339 * don't use PME ranks. We check later if the DD grid is
2340 * compatible with the total number of ranks.
2342 ddSetup.numPmeRanks = 0;
2347 set_ddbox_cr(*cr, nullptr, *ir, box, xGlobal, ddbox);
2349 /* We need to choose the optimal DD grid and possibly PME nodes */
2351 dd_choose_grid(mdlog, cr, ir, mtop, box, ddbox,
2352 options.numPmeRanks,
2353 !isDlbDisabled(comm),
2357 if (ddSetup.numDomains[XX] == 0)
2360 gmx_bool bC = (dd->splitConstraints && rconstr > r_bonded_limit);
2361 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2362 !bC ? "-rdd" : "-rcon",
2363 comm->dlbState != DlbState::offUser ? " or -dds" : "",
2364 bC ? " or your LINCS settings" : "");
2366 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2367 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2369 "Look in the log file for details on the domain decomposition",
2370 cr->nnodes - ddSetup.numPmeRanks,
2371 ddSetup.cellsizeLimit,
2376 const real acs = average_cellsize_min(*ddbox, ddSetup.numDomains);
2377 if (acs < systemInfo.cellsizeLimit)
2379 if (options.numCells[XX] <= 0)
2381 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2385 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2386 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2387 acs, systemInfo.cellsizeLimit);
2391 /* Set the DD setup given by ddSetup */
2392 cr->npmenodes = ddSetup.numPmeRanks;
2393 copy_ivec(ddSetup.numDomains, dd->nc);
2394 set_dd_dim(mdlog, dd);
2396 GMX_LOG(mdlog.info).appendTextFormatted(
2397 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2398 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
2400 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2401 if (cr->nnodes - dd->nnodes != cr->npmenodes)
2403 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2404 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
2405 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
2407 if (cr->npmenodes > dd->nnodes)
2409 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2410 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
2412 if (cr->npmenodes > 0)
2414 comm->npmenodes = cr->npmenodes;
2418 comm->npmenodes = dd->nnodes;
2421 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2423 /* The following choices should match those
2424 * in comm_cost_est in domdec_setup.c.
2425 * Note that here the checks have to take into account
2426 * that the decomposition might occur in a different order than xyz
2427 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2428 * in which case they will not match those in comm_cost_est,
2429 * but since that is mainly for testing purposes that's fine.
2431 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
2432 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
2433 getenv("GMX_PMEONEDD") == nullptr)
2435 comm->npmedecompdim = 2;
2436 comm->npmenodes_x = dd->nc[XX];
2437 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
2441 /* In case nc is 1 in both x and y we could still choose to
2442 * decompose pme in y instead of x, but we use x for simplicity.
2444 comm->npmedecompdim = 1;
2445 if (dd->dim[0] == YY)
2447 comm->npmenodes_x = 1;
2448 comm->npmenodes_y = comm->npmenodes;
2452 comm->npmenodes_x = comm->npmenodes;
2453 comm->npmenodes_y = 1;
2456 GMX_LOG(mdlog.info).appendTextFormatted(
2457 "PME domain decomposition: %d x %d x %d",
2458 comm->npmenodes_x, comm->npmenodes_y, 1);
2462 comm->npmedecompdim = 0;
2463 comm->npmenodes_x = 0;
2464 comm->npmenodes_y = 0;
2467 snew(comm->slb_frac, DIM);
2468 if (isDlbDisabled(comm))
2470 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2471 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2472 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2475 /* Set the multi-body cut-off and cellsize limit for DLB */
2476 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2477 comm->cellsize_limit = systemInfo.cellsizeLimit;
2478 if (systemInfo.haveInterDomainBondeds && comm->cutoff_mbody == 0)
2480 if (comm->bBondComm || !isDlbDisabled(comm))
2482 /* Set the bonded communication distance to halfway
2483 * the minimum and the maximum,
2484 * since the extra communication cost is nearly zero.
2486 real acs = average_cellsize_min(*ddbox, dd->nc);
2487 comm->cutoff_mbody = 0.5*(r_bonded + acs);
2488 if (!isDlbDisabled(comm))
2490 /* Check if this does not limit the scaling */
2491 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2492 options.dlbScaling*acs);
2494 if (!comm->bBondComm)
2496 /* Without bBondComm do not go beyond the n.b. cut-off */
2497 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2498 if (comm->cellsize_limit >= systemInfo.cutoff)
2500 /* We don't loose a lot of efficieny
2501 * when increasing it to the n.b. cut-off.
2502 * It can even be slightly faster, because we need
2503 * less checks for the communication setup.
2505 comm->cutoff_mbody = systemInfo.cutoff;
2508 /* Check if we did not end up below our original limit */
2509 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
2511 if (comm->cutoff_mbody > comm->cellsize_limit)
2513 comm->cellsize_limit = comm->cutoff_mbody;
2516 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2521 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2522 "cellsize limit %f\n",
2523 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
2528 check_dd_restrictions(dd, ir, mdlog);
2532 static char *init_bLocalCG(const gmx_mtop_t *mtop)
2537 ncg = ncg_mtop(mtop);
2538 snew(bLocalCG, ncg);
2539 for (cg = 0; cg < ncg; cg++)
2541 bLocalCG[cg] = FALSE;
2547 void dd_init_bondeds(FILE *fplog,
2549 const gmx_mtop_t *mtop,
2550 const gmx_vsite_t *vsite,
2551 const t_inputrec *ir,
2552 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
2554 gmx_domdec_comm_t *comm;
2556 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2560 if (comm->bBondComm)
2562 /* Communicate atoms beyond the cut-off for bonded interactions */
2565 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
2567 comm->bLocalCG = init_bLocalCG(mtop);
2571 /* Only communicate atoms based on cut-off */
2572 comm->cglink = nullptr;
2573 comm->bLocalCG = nullptr;
2577 static void writeSettings(gmx::TextWriter *log,
2579 const gmx_mtop_t *mtop,
2580 const t_inputrec *ir,
2581 gmx_bool bDynLoadBal,
2583 const gmx_ddbox_t *ddbox)
2585 gmx_domdec_comm_t *comm;
2594 log->writeString("The maximum number of communication pulses is:");
2595 for (d = 0; d < dd->ndim; d++)
2597 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2599 log->ensureLineBreak();
2600 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2601 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2602 log->writeString("The allowed shrink of domain decomposition cells is:");
2603 for (d = 0; d < DIM; d++)
2607 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2614 comm->cellsize_min_dlb[d]/
2615 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2617 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2620 log->ensureLineBreak();
2624 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2625 log->writeString("The initial number of communication pulses is:");
2626 for (d = 0; d < dd->ndim; d++)
2628 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2630 log->ensureLineBreak();
2631 log->writeString("The initial domain decomposition cell size is:");
2632 for (d = 0; d < DIM; d++)
2636 log->writeStringFormatted(" %c %.2f nm",
2637 dim2char(d), dd->comm->cellsize_min[d]);
2640 log->ensureLineBreak();
2644 const bool haveInterDomainVsites =
2645 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2647 if (comm->systemInfo.haveInterDomainBondeds ||
2648 haveInterDomainVsites ||
2649 dd->splitConstraints || dd->splitSettles)
2651 std::string decompUnits;
2652 if (comm->systemInfo.useUpdateGroups)
2654 decompUnits = "atom groups";
2658 decompUnits = "atoms";
2661 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2662 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2666 limit = dd->comm->cellsize_limit;
2670 if (dd->unitCellInfo.ddBoxIsDynamic)
2672 log->writeLine("(the following are initial values, they could change due to box deformation)");
2674 limit = dd->comm->cellsize_min[XX];
2675 for (d = 1; d < DIM; d++)
2677 limit = std::min(limit, dd->comm->cellsize_min[d]);
2681 if (comm->systemInfo.haveInterDomainBondeds)
2683 log->writeLineFormatted("%40s %-7s %6.3f nm",
2684 "two-body bonded interactions", "(-rdd)",
2685 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2686 log->writeLineFormatted("%40s %-7s %6.3f nm",
2687 "multi-body bonded interactions", "(-rdd)",
2688 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2690 if (haveInterDomainVsites)
2692 log->writeLineFormatted("%40s %-7s %6.3f nm",
2693 "virtual site constructions", "(-rcon)", limit);
2695 if (dd->splitConstraints || dd->splitSettles)
2697 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2699 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2700 separation.c_str(), "(-rcon)", limit);
2702 log->ensureLineBreak();
2706 static void logSettings(const gmx::MDLogger &mdlog,
2708 const gmx_mtop_t *mtop,
2709 const t_inputrec *ir,
2711 const gmx_ddbox_t *ddbox)
2713 gmx::StringOutputStream stream;
2714 gmx::TextWriter log(&stream);
2715 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2716 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2719 log.ensureEmptyLine();
2720 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2722 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2724 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2727 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2730 const t_inputrec *ir,
2731 const gmx_ddbox_t *ddbox)
2733 gmx_domdec_comm_t *comm;
2734 int d, dim, npulse, npulse_d_max, npulse_d;
2739 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2741 /* Determine the maximum number of comm. pulses in one dimension */
2743 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2745 /* Determine the maximum required number of grid pulses */
2746 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2748 /* Only a single pulse is required */
2751 else if (!bNoCutOff && comm->cellsize_limit > 0)
2753 /* We round down slightly here to avoid overhead due to the latency
2754 * of extra communication calls when the cut-off
2755 * would be only slightly longer than the cell size.
2756 * Later cellsize_limit is redetermined,
2757 * so we can not miss interactions due to this rounding.
2759 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2763 /* There is no cell size limit */
2764 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2767 if (!bNoCutOff && npulse > 1)
2769 /* See if we can do with less pulses, based on dlb_scale */
2771 for (d = 0; d < dd->ndim; d++)
2774 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2775 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2776 npulse_d_max = std::max(npulse_d_max, npulse_d);
2778 npulse = std::min(npulse, npulse_d_max);
2781 /* This env var can override npulse */
2782 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2789 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2790 for (d = 0; d < dd->ndim; d++)
2792 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2793 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2794 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2796 comm->bVacDLBNoLimit = FALSE;
2800 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2801 if (!comm->bVacDLBNoLimit)
2803 comm->cellsize_limit = std::max(comm->cellsize_limit,
2804 comm->systemInfo.cutoff/comm->maxpulse);
2806 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2807 /* Set the minimum cell size for each DD dimension */
2808 for (d = 0; d < dd->ndim; d++)
2810 if (comm->bVacDLBNoLimit ||
2811 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2813 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2817 comm->cellsize_min_dlb[dd->dim[d]] =
2818 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2821 if (comm->cutoff_mbody <= 0)
2823 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2831 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2833 /* If each molecule is a single charge group
2834 * or we use domain decomposition for each periodic dimension,
2835 * we do not need to take pbc into account for the bonded interactions.
2837 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2840 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2843 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2844 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2845 gmx_domdec_t *dd, real dlb_scale,
2846 const gmx_mtop_t *mtop, const t_inputrec *ir,
2847 const gmx_ddbox_t *ddbox)
2849 gmx_domdec_comm_t *comm;
2855 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2857 init_ddpme(dd, &comm->ddpme[0], 0);
2858 if (comm->npmedecompdim >= 2)
2860 init_ddpme(dd, &comm->ddpme[1], 1);
2865 comm->npmenodes = 0;
2866 if (dd->pme_nodeid >= 0)
2868 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2869 "Can not have separate PME ranks without PME electrostatics");
2875 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2877 if (!isDlbDisabled(comm))
2879 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2882 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2884 if (ir->ePBC == epbcNONE)
2886 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2891 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, ddbox))/static_cast<double>(dd->nnodes);
2895 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2897 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
2899 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2900 static_cast<int>(vol_frac*natoms_tot));
2903 /*! \brief Set some important DD parameters that can be modified by env.vars */
2904 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
2905 gmx_domdec_t *dd, int rank_mysim)
2907 gmx_domdec_comm_t *comm = dd->comm;
2909 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2910 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2911 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2912 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2913 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2914 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2915 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2919 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2924 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2925 if (comm->eFlop > 1)
2927 srand(1 + rank_mysim);
2929 comm->bRecordLoad = TRUE;
2933 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
2937 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2942 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
2944 const DomdecOptions &options,
2945 const gmx::MdrunOptions &mdrunOptions,
2946 const gmx_mtop_t *mtop,
2947 const t_inputrec *ir,
2949 gmx::ArrayRef<const gmx::RVec> xGlobal,
2950 gmx::LocalAtomSetManager *atomSets)
2954 GMX_LOG(mdlog.info).appendTextFormatted(
2955 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
2957 dd = new gmx_domdec_t(*ir);
2959 dd->comm = init_dd_comm();
2961 set_dd_envvar_options(mdlog, dd, cr->nodeid);
2963 gmx_ddbox_t ddbox = {0};
2964 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
2969 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
2971 if (thisRankHasDuty(cr, DUTY_PP))
2973 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
2975 setup_neighbor_relations(dd);
2978 /* Set overallocation to avoid frequent reallocation of arrays */
2979 set_over_alloc_dd(TRUE);
2981 dd->atomSets = atomSets;
2986 static gmx_bool test_dd_cutoff(t_commrec *cr,
2987 const t_state &state,
2988 real cutoffRequested)
2998 set_ddbox(*dd, false, state.box, true, state.x, &ddbox);
3002 for (d = 0; d < dd->ndim; d++)
3006 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3007 if (dd->unitCellInfo.ddBoxIsDynamic)
3009 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3012 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3014 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3016 if (np > dd->comm->cd[d].np_dlb)
3021 /* If a current local cell size is smaller than the requested
3022 * cut-off, we could still fix it, but this gets very complicated.
3023 * Without fixing here, we might actually need more checks.
3025 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3026 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3033 if (!isDlbDisabled(dd->comm))
3035 /* If DLB is not active yet, we don't need to check the grid jumps.
3036 * Actually we shouldn't, because then the grid jump data is not set.
3038 if (isDlbOn(dd->comm) &&
3039 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3044 gmx_sumi(1, &LocallyLimited, cr);
3046 if (LocallyLimited > 0)
3055 gmx_bool change_dd_cutoff(t_commrec *cr,
3056 const t_state &state,
3057 real cutoffRequested)
3059 gmx_bool bCutoffAllowed;
3061 bCutoffAllowed = test_dd_cutoff(cr, state, cutoffRequested);
3065 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3068 return bCutoffAllowed;