2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlb.h"
55 #include "gromacs/domdec/dlbtiming.h"
56 #include "gromacs/domdec/domdec_network.h"
57 #include "gromacs/domdec/ga2la.h"
58 #include "gromacs/domdec/gpuhaloexchange.h"
59 #include "gromacs/domdec/options.h"
60 #include "gromacs/domdec/partition.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/listed_forces/manage_threading.h"
66 #include "gromacs/math/vec.h"
67 #include "gromacs/math/vectypes.h"
68 #include "gromacs/mdlib/calc_verletbuf.h"
69 #include "gromacs/mdlib/constr.h"
70 #include "gromacs/mdlib/constraintrange.h"
71 #include "gromacs/mdlib/updategroups.h"
72 #include "gromacs/mdlib/vsite.h"
73 #include "gromacs/mdtypes/commrec.h"
74 #include "gromacs/mdtypes/forceoutput.h"
75 #include "gromacs/mdtypes/inputrec.h"
76 #include "gromacs/mdtypes/mdrunoptions.h"
77 #include "gromacs/mdtypes/state.h"
78 #include "gromacs/pbcutil/ishift.h"
79 #include "gromacs/pbcutil/pbc.h"
80 #include "gromacs/pulling/pull.h"
81 #include "gromacs/timing/wallcycle.h"
82 #include "gromacs/topology/block.h"
83 #include "gromacs/topology/idef.h"
84 #include "gromacs/topology/ifunc.h"
85 #include "gromacs/topology/mtop_lookup.h"
86 #include "gromacs/topology/mtop_util.h"
87 #include "gromacs/topology/topology.h"
88 #include "gromacs/utility/basedefinitions.h"
89 #include "gromacs/utility/basenetwork.h"
90 #include "gromacs/utility/cstringutil.h"
91 #include "gromacs/utility/exceptions.h"
92 #include "gromacs/utility/fatalerror.h"
93 #include "gromacs/utility/gmxmpi.h"
94 #include "gromacs/utility/logger.h"
95 #include "gromacs/utility/real.h"
96 #include "gromacs/utility/smalloc.h"
97 #include "gromacs/utility/strconvert.h"
98 #include "gromacs/utility/stringstream.h"
99 #include "gromacs/utility/stringutil.h"
100 #include "gromacs/utility/textwriter.h"
102 #include "atomdistribution.h"
104 #include "cellsizes.h"
105 #include "distribute.h"
106 #include "domdec_constraints.h"
107 #include "domdec_internal.h"
108 #include "domdec_setup.h"
109 #include "domdec_vsite.h"
110 #include "redistribute.h"
113 // TODO remove this when moving domdec into gmx namespace
114 using gmx::DdRankOrder;
115 using gmx::DlbOption;
116 using gmx::DomdecOptions;
118 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
120 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
123 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
124 #define DD_FLAG_NRCG 65535
125 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
126 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
128 /* The DD zone order */
129 static const ivec dd_zo[DD_MAXZONE] =
130 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
132 /* The non-bonded zone-pair setup for domain decomposition
133 * The first number is the i-zone, the second number the first j-zone seen by
134 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
135 * As is, this is for 3D decomposition, where there are 4 i-zones.
136 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
137 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
140 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
147 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
149 static void index2xyz(ivec nc,int ind,ivec xyz)
151 xyz[XX] = ind % nc[XX];
152 xyz[YY] = (ind / nc[XX]) % nc[YY];
153 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
157 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
159 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
160 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
161 xyz[ZZ] = ind % nc[ZZ];
164 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
168 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
169 const int ddindex = dd_index(dd->nc, c);
170 if (cartSetup.bCartesianPP_PME)
172 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
174 else if (cartSetup.bCartesianPP)
177 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
188 int ddglatnr(const gmx_domdec_t *dd, int i)
198 if (i >= dd->comm->atomRanges.numAtomsTotal())
200 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
202 atnr = dd->globalAtomIndices[i] + 1;
208 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t &dd)
210 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
211 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
214 void dd_store_state(gmx_domdec_t *dd, t_state *state)
218 if (state->ddp_count != dd->ddp_count)
220 gmx_incons("The MD state does not match the domain decomposition state");
223 state->cg_gl.resize(dd->ncg_home);
224 for (i = 0; i < dd->ncg_home; i++)
226 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
229 state->ddp_count_cg_gl = dd->ddp_count;
232 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
234 return &dd->comm->zones;
237 int dd_numHomeAtoms(const gmx_domdec_t &dd)
239 return dd.comm->atomRanges.numHomeAtoms();
242 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
244 /* We currently set mdatoms entries for all atoms:
245 * local + non-local + communicated for vsite + constraints
248 return dd->comm->atomRanges.numAtomsTotal();
251 int dd_natoms_vsite(const gmx_domdec_t *dd)
253 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
256 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
258 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
259 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
262 void dd_move_x(gmx_domdec_t *dd,
264 gmx::ArrayRef<gmx::RVec> x,
265 gmx_wallcycle *wcycle)
267 wallcycle_start(wcycle, ewcMOVEX);
270 gmx_domdec_comm_t *comm;
271 gmx_domdec_comm_dim_t *cd;
272 rvec shift = {0, 0, 0};
273 gmx_bool bPBC, bScrew;
278 nat_tot = comm->atomRanges.numHomeAtoms();
279 for (int d = 0; d < dd->ndim; d++)
281 bPBC = (dd->ci[dd->dim[d]] == 0);
282 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
285 copy_rvec(box[dd->dim[d]], shift);
288 for (const gmx_domdec_ind_t &ind : cd->ind)
290 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
291 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
295 for (int j : ind.index)
297 sendBuffer[n] = x[j];
303 for (int j : ind.index)
305 /* We need to shift the coordinates */
306 for (int d = 0; d < DIM; d++)
308 sendBuffer[n][d] = x[j][d] + shift[d];
315 for (int j : ind.index)
318 sendBuffer[n][XX] = x[j][XX] + shift[XX];
320 * This operation requires a special shift force
321 * treatment, which is performed in calc_vir.
323 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
324 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
329 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
331 gmx::ArrayRef<gmx::RVec> receiveBuffer;
332 if (cd->receiveInPlace)
334 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
338 receiveBuffer = receiveBufferAccess.buffer;
340 /* Send and receive the coordinates */
341 ddSendrecv(dd, d, dddirBackward,
342 sendBuffer, receiveBuffer);
344 if (!cd->receiveInPlace)
347 for (int zone = 0; zone < nzone; zone++)
349 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
351 x[i] = receiveBuffer[j++];
355 nat_tot += ind.nrecv[nzone+1];
360 wallcycle_stop(wcycle, ewcMOVEX);
363 void dd_move_f(gmx_domdec_t *dd,
364 gmx::ForceWithShiftForces *forceWithShiftForces,
365 gmx_wallcycle *wcycle)
367 wallcycle_start(wcycle, ewcMOVEF);
369 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
370 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
372 gmx_domdec_comm_t &comm = *dd->comm;
373 int nzone = comm.zones.n/2;
374 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
375 for (int d = dd->ndim-1; d >= 0; d--)
377 /* Only forces in domains near the PBC boundaries need to
378 consider PBC in the treatment of fshift */
379 const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
380 const bool applyScrewPbc = (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
381 /* Determine which shift vector we need */
382 ivec vis = { 0, 0, 0 };
384 const int is = IVEC2IS(vis);
386 /* Loop over the pulses */
387 const gmx_domdec_comm_dim_t &cd = comm.cd[d];
388 for (int p = cd.numPulses() - 1; p >= 0; p--)
390 const gmx_domdec_ind_t &ind = cd.ind[p];
391 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
392 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
394 nat_tot -= ind.nrecv[nzone+1];
396 DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
398 gmx::ArrayRef<gmx::RVec> sendBuffer;
399 if (cd.receiveInPlace)
401 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
405 sendBuffer = sendBufferAccess.buffer;
407 for (int zone = 0; zone < nzone; zone++)
409 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
411 sendBuffer[j++] = f[i];
415 /* Communicate the forces */
416 ddSendrecv(dd, d, dddirForward,
417 sendBuffer, receiveBuffer);
418 /* Add the received forces */
420 if (!shiftForcesNeedPbc)
422 for (int j : ind.index)
424 for (int d = 0; d < DIM; d++)
426 f[j][d] += receiveBuffer[n][d];
431 else if (!applyScrewPbc)
433 for (int j : ind.index)
435 for (int d = 0; d < DIM; d++)
437 f[j][d] += receiveBuffer[n][d];
439 /* Add this force to the shift force */
440 for (int d = 0; d < DIM; d++)
442 fshift[is][d] += receiveBuffer[n][d];
449 for (int j : ind.index)
451 /* Rotate the force */
452 f[j][XX] += receiveBuffer[n][XX];
453 f[j][YY] -= receiveBuffer[n][YY];
454 f[j][ZZ] -= receiveBuffer[n][ZZ];
455 if (shiftForcesNeedPbc)
457 /* Add this force to the shift force */
458 for (int d = 0; d < DIM; d++)
460 fshift[is][d] += receiveBuffer[n][d];
469 wallcycle_stop(wcycle, ewcMOVEF);
472 /* Convenience function for extracting a real buffer from an rvec buffer
474 * To reduce the number of temporary communication buffers and avoid
475 * cache polution, we reuse gmx::RVec buffers for storing reals.
476 * This functions return a real buffer reference with the same number
477 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
479 static gmx::ArrayRef<real>
480 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
482 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
486 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
489 gmx_domdec_comm_t *comm;
490 gmx_domdec_comm_dim_t *cd;
495 nat_tot = comm->atomRanges.numHomeAtoms();
496 for (int d = 0; d < dd->ndim; d++)
499 for (const gmx_domdec_ind_t &ind : cd->ind)
501 /* Note: We provision for RVec instead of real, so a factor of 3
502 * more than needed. The buffer actually already has this size
503 * and we pass a plain pointer below, so this does not matter.
505 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
506 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
508 for (int j : ind.index)
510 sendBuffer[n++] = v[j];
513 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
515 gmx::ArrayRef<real> receiveBuffer;
516 if (cd->receiveInPlace)
518 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
522 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
524 /* Send and receive the data */
525 ddSendrecv(dd, d, dddirBackward,
526 sendBuffer, receiveBuffer);
527 if (!cd->receiveInPlace)
530 for (int zone = 0; zone < nzone; zone++)
532 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
534 v[i] = receiveBuffer[j++];
538 nat_tot += ind.nrecv[nzone+1];
544 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
547 gmx_domdec_comm_t *comm;
548 gmx_domdec_comm_dim_t *cd;
552 nzone = comm->zones.n/2;
553 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
554 for (int d = dd->ndim-1; d >= 0; d--)
557 for (int p = cd->numPulses() - 1; p >= 0; p--)
559 const gmx_domdec_ind_t &ind = cd->ind[p];
561 /* Note: We provision for RVec instead of real, so a factor of 3
562 * more than needed. The buffer actually already has this size
563 * and we typecast, so this works as intended.
565 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
566 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
567 nat_tot -= ind.nrecv[nzone + 1];
569 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
571 gmx::ArrayRef<real> sendBuffer;
572 if (cd->receiveInPlace)
574 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
578 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
580 for (int zone = 0; zone < nzone; zone++)
582 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
584 sendBuffer[j++] = v[i];
588 /* Communicate the forces */
589 ddSendrecv(dd, d, dddirForward,
590 sendBuffer, receiveBuffer);
591 /* Add the received forces */
593 for (int j : ind.index)
595 v[j] += receiveBuffer[n];
603 real dd_cutoff_multibody(const gmx_domdec_t *dd)
605 const gmx_domdec_comm_t &comm = *dd->comm;
606 const DDSystemInfo &systemInfo = comm.systemInfo;
609 if (systemInfo.haveInterDomainMultiBodyBondeds)
611 if (comm.cutoff_mbody > 0)
613 r = comm.cutoff_mbody;
617 /* cutoff_mbody=0 means we do not have DLB */
618 r = comm.cellsize_min[dd->dim[0]];
619 for (int di = 1; di < dd->ndim; di++)
621 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
623 if (comm.systemInfo.filterBondedCommunication)
625 r = std::max(r, comm.cutoff_mbody);
629 r = std::min(r, systemInfo.cutoff);
637 real dd_cutoff_twobody(const gmx_domdec_t *dd)
641 r_mb = dd_cutoff_multibody(dd);
643 return std::max(dd->comm->systemInfo.cutoff, r_mb);
647 static void dd_cart_coord2pmecoord(const DDRankSetup &ddRankSetup,
648 const CartesianRankSetup &cartSetup,
652 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
653 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
654 copy_ivec(coord, coord_pme);
655 coord_pme[cartSetup.cartpmedim] =
656 nc + (coord[cartSetup.cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
659 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
660 static int ddindex2pmeindex(const DDRankSetup &ddRankSetup,
661 const int ddCellIndex)
663 const int npp = ddRankSetup.numPPRanks;
664 const int npme = ddRankSetup.numRanksDoingPme;
666 /* Here we assign a PME node to communicate with this DD node
667 * by assuming that the major index of both is x.
668 * We add npme/2 to obtain an even distribution.
670 return (ddCellIndex*npme + npme/2)/npp;
673 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup &ddRankSetup)
675 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
678 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
680 const int p0 = ddindex2pmeindex(ddRankSetup, i);
681 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
682 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
686 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
688 pmeRanks[n] = i + 1 + n;
696 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
704 if (dd->comm->bCartesian) {
705 gmx_ddindex2xyz(dd->nc,ddindex,coords);
706 dd_coords2pmecoords(dd,coords,coords_pme);
707 copy_ivec(dd->ntot,nc);
708 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
709 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
711 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
713 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
719 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
724 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
726 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
727 ivec coords = { x, y, z };
730 if (cartSetup.bCartesianPP_PME)
733 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
738 int ddindex = dd_index(cr->dd->nc, coords);
739 if (cartSetup.bCartesianPP)
741 nodeid = cartSetup.ddindex2simnodeid[ddindex];
745 if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
747 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
759 static int dd_simnode2pmenode(const DDRankSetup &ddRankSetup,
760 const CartesianRankSetup &cartSetup,
761 gmx::ArrayRef<const int> pmeRanks,
762 const t_commrec gmx_unused *cr,
763 const int sim_nodeid)
767 /* This assumes a uniform x domain decomposition grid cell size */
768 if (cartSetup.bCartesianPP_PME)
771 ivec coord, coord_pme;
772 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
773 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
775 /* This is a PP rank */
776 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
777 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
781 else if (cartSetup.bCartesianPP)
783 if (sim_nodeid < ddRankSetup.numPPRanks)
785 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
790 /* This assumes DD cells with identical x coordinates
791 * are numbered sequentially.
793 if (pmeRanks.empty())
795 if (sim_nodeid < ddRankSetup.numPPRanks)
797 /* The DD index equals the nodeid */
798 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
804 while (sim_nodeid > pmeRanks[i])
808 if (sim_nodeid < pmeRanks[i])
810 pmenode = pmeRanks[i];
818 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
823 dd->comm->ddRankSetup.npmenodes_x,
824 dd->comm->ddRankSetup.npmenodes_y
835 std::vector<int> get_pme_ddranks(const t_commrec *cr,
838 const DDRankSetup &ddRankSetup = cr->dd->comm->ddRankSetup;
839 const CartesianRankSetup &cartSetup = cr->dd->comm->cartesianRankSetup;
840 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks, "This function should only be called when PME-only ranks are in use");
841 std::vector<int> ddranks;
842 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1)/ddRankSetup.numRanksDoingPme);
844 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
846 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
848 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
850 if (cartSetup.bCartesianPP_PME)
852 ivec coord = { x, y, z };
854 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
855 if (cr->dd->ci[XX] == coord_pme[XX] &&
856 cr->dd->ci[YY] == coord_pme[YY] &&
857 cr->dd->ci[ZZ] == coord_pme[ZZ])
859 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
864 /* The slab corresponds to the nodeid in the PME group */
865 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
867 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
876 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd,
877 gmx::ArrayRef<const int> pmeRanks,
880 gmx_bool bReceive = TRUE;
882 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
883 if (ddRankSetup.usePmeOnlyRanks)
885 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
886 if (cartSetup.bCartesianPP_PME)
889 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
891 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
892 coords[cartSetup.cartpmedim]++;
893 if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
896 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
897 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
899 /* This is not the last PP node for pmenode */
904 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
909 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
910 if (cr->sim_nodeid+1 < cr->nnodes &&
911 dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
913 /* This is not the last PP node for pmenode */
922 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
924 gmx_domdec_comm_t *comm;
929 snew(*dim_f, dd->nc[dim]+1);
931 for (i = 1; i < dd->nc[dim]; i++)
933 if (comm->slb_frac[dim])
935 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
939 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
942 (*dim_f)[dd->nc[dim]] = 1;
945 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
947 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
949 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
957 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
959 ddpme->nslab = (ddpme->dim == 0 ?
960 ddRankSetup.npmenodes_x :
961 ddRankSetup.npmenodes_y);
963 if (ddpme->nslab <= 1)
968 const int nso = ddRankSetup.numRanksDoingPme/ddpme->nslab;
969 /* Determine for each PME slab the PP location range for dimension dim */
970 snew(ddpme->pp_min, ddpme->nslab);
971 snew(ddpme->pp_max, ddpme->nslab);
972 for (int slab = 0; slab < ddpme->nslab; slab++)
974 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
975 ddpme->pp_max[slab] = 0;
977 for (int i = 0; i < dd->nnodes; i++)
980 ddindex2xyz(dd->nc, i, xyz);
981 /* For y only use our y/z slab.
982 * This assumes that the PME x grid size matches the DD grid size.
984 if (dimind == 0 || xyz[XX] == dd->ci[XX])
986 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
994 slab = pmeindex % ddpme->nslab;
996 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
997 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1001 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1004 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1006 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1008 if (ddRankSetup.ddpme[0].dim == XX)
1010 return ddRankSetup.ddpme[0].maxshift;
1018 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1020 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1022 if (ddRankSetup.ddpme[0].dim == YY)
1024 return ddRankSetup.ddpme[0].maxshift;
1026 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
1028 return ddRankSetup.ddpme[1].maxshift;
1036 bool ddHaveSplitConstraints(const gmx_domdec_t &dd)
1038 return dd.comm->systemInfo.haveSplitConstraints;
1041 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
1043 /* Note that the cycles value can be incorrect, either 0 or some
1044 * extremely large value, when our thread migrated to another core
1045 * with an unsynchronized cycle counter. If this happens less often
1046 * that once per nstlist steps, this will not cause issues, since
1047 * we later subtract the maximum value from the sum over nstlist steps.
1048 * A zero count will slightly lower the total, but that's a small effect.
1049 * Note that the main purpose of the subtraction of the maximum value
1050 * is to avoid throwing off the load balancing when stalls occur due
1051 * e.g. system activity or network congestion.
1053 dd->comm->cycl[ddCycl] += cycles;
1054 dd->comm->cycl_n[ddCycl]++;
1055 if (cycles > dd->comm->cycl_max[ddCycl])
1057 dd->comm->cycl_max[ddCycl] = cycles;
1062 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
1067 gmx_bool bPartOfGroup = FALSE;
1069 dim = dd->dim[dim_ind];
1070 copy_ivec(loc, loc_c);
1071 for (i = 0; i < dd->nc[dim]; i++)
1074 rank = dd_index(dd->nc, loc_c);
1075 if (rank == dd->rank)
1077 /* This process is part of the group */
1078 bPartOfGroup = TRUE;
1081 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
1085 dd->comm->mpi_comm_load[dim_ind] = c_row;
1086 if (!isDlbDisabled(dd->comm))
1088 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1090 if (dd->ci[dim] == dd->master_ci[dim])
1092 /* This is the root process of this row */
1093 cellsizes.rowMaster = std::make_unique<RowMaster>();
1095 RowMaster &rowMaster = *cellsizes.rowMaster;
1096 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1097 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
1098 rowMaster.isCellMin.resize(dd->nc[dim]);
1101 rowMaster.bounds.resize(dd->nc[dim]);
1103 rowMaster.buf_ncd.resize(dd->nc[dim]);
1107 /* This is not a root process, we only need to receive cell_f */
1108 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1111 if (dd->ci[dim] == dd->master_ci[dim])
1113 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
1119 void dd_setup_dlb_resource_sharing(const t_commrec *cr,
1123 int physicalnode_id_hash;
1125 MPI_Comm mpi_comm_pp_physicalnode;
1127 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1129 /* Only ranks with short-ranged tasks (currently) use GPUs.
1130 * If we don't have GPUs assigned, there are no resources to share.
1135 physicalnode_id_hash = gmx_physicalnode_id_hash();
1141 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1142 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
1143 dd->rank, physicalnode_id_hash, gpu_id);
1145 /* Split the PP communicator over the physical nodes */
1146 /* TODO: See if we should store this (before), as it's also used for
1147 * for the nodecomm summation.
1149 // TODO PhysicalNodeCommunicator could be extended/used to handle
1150 // the need for per-node per-group communicators.
1151 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
1152 &mpi_comm_pp_physicalnode);
1153 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
1154 &dd->comm->mpi_comm_gpu_shared);
1155 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1156 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1160 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1163 /* Note that some ranks could share a GPU, while others don't */
1165 if (dd->comm->nrank_gpu_shared == 1)
1167 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1170 GMX_UNUSED_VALUE(cr);
1171 GMX_UNUSED_VALUE(gpu_id);
1175 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
1178 int dim0, dim1, i, j;
1183 fprintf(debug, "Making load communicators\n");
1186 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1187 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1195 make_load_communicator(dd, 0, loc);
1199 for (i = 0; i < dd->nc[dim0]; i++)
1202 make_load_communicator(dd, 1, loc);
1208 for (i = 0; i < dd->nc[dim0]; i++)
1212 for (j = 0; j < dd->nc[dim1]; j++)
1215 make_load_communicator(dd, 2, loc);
1222 fprintf(debug, "Finished making load communicators\n");
1227 /*! \brief Sets up the relation between neighboring domains and zones */
1228 static void setup_neighbor_relations(gmx_domdec_t *dd)
1230 int d, dim, i, j, m;
1232 gmx_domdec_zones_t *zones;
1233 gmx_domdec_ns_ranges_t *izone;
1234 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1236 for (d = 0; d < dd->ndim; d++)
1239 copy_ivec(dd->ci, tmp);
1240 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
1241 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1242 copy_ivec(dd->ci, tmp);
1243 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
1244 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1247 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
1250 dd->neighbor[d][1]);
1254 int nzone = (1 << dd->ndim);
1255 int nizone = (1 << std::max(dd->ndim - 1, 0));
1256 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1258 zones = &dd->comm->zones;
1260 for (i = 0; i < nzone; i++)
1263 clear_ivec(zones->shift[i]);
1264 for (d = 0; d < dd->ndim; d++)
1266 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1271 for (i = 0; i < nzone; i++)
1273 for (d = 0; d < DIM; d++)
1275 s[d] = dd->ci[d] - zones->shift[i][d];
1280 else if (s[d] >= dd->nc[d])
1286 zones->nizone = nizone;
1287 for (i = 0; i < zones->nizone; i++)
1289 assert(ddNonbondedZonePairRanges[i][0] == i);
1291 izone = &zones->izone[i];
1292 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1293 * j-zones up to nzone.
1295 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
1296 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
1297 for (dim = 0; dim < DIM; dim++)
1299 if (dd->nc[dim] == 1)
1301 /* All shifts should be allowed */
1302 izone->shift0[dim] = -1;
1303 izone->shift1[dim] = 1;
1307 /* Determine the min/max j-zone shift wrt the i-zone */
1308 izone->shift0[dim] = 1;
1309 izone->shift1[dim] = -1;
1310 for (j = izone->j0; j < izone->j1; j++)
1312 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
1313 if (shift_diff < izone->shift0[dim])
1315 izone->shift0[dim] = shift_diff;
1317 if (shift_diff > izone->shift1[dim])
1319 izone->shift1[dim] = shift_diff;
1326 if (!isDlbDisabled(dd->comm))
1328 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1331 if (dd->comm->ddSettings.recordLoad)
1333 make_load_communicators(dd);
1337 static void make_pp_communicator(const gmx::MDLogger &mdlog,
1339 t_commrec gmx_unused *cr,
1340 bool gmx_unused reorder)
1343 gmx_domdec_comm_t *comm = dd->comm;
1344 CartesianRankSetup &cartSetup = comm->cartesianRankSetup;
1346 if (cartSetup.bCartesianPP)
1348 /* Set up cartesian communication for the particle-particle part */
1349 GMX_LOG(mdlog.info).appendTextFormatted(
1350 "Will use a Cartesian communicator: %d x %d x %d",
1351 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
1354 for (int i = 0; i < DIM; i++)
1359 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
1361 /* We overwrite the old communicator with the new cartesian one */
1362 cr->mpi_comm_mygroup = comm_cart;
1365 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1366 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1368 if (cartSetup.bCartesianPP_PME)
1370 /* Since we want to use the original cartesian setup for sim,
1371 * and not the one after split, we need to make an index.
1373 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1374 cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
1375 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1376 /* Get the rank of the DD master,
1377 * above we made sure that the master node is a PP node.
1388 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1390 else if (cartSetup.bCartesianPP)
1392 if (!comm->ddRankSetup.usePmeOnlyRanks)
1394 /* The PP communicator is also
1395 * the communicator for this simulation
1397 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1399 cr->nodeid = dd->rank;
1401 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1403 /* We need to make an index to go from the coordinates
1404 * to the nodeid of this simulation.
1406 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1407 std::vector<int> buf(dd->nnodes);
1408 if (thisRankHasDuty(cr, DUTY_PP))
1410 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1412 /* Communicate the ddindex to simulation nodeid index */
1413 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1414 cr->mpi_comm_mysim);
1416 /* Determine the master coordinates and rank.
1417 * The DD master should be the same node as the master of this sim.
1419 for (int i = 0; i < dd->nnodes; i++)
1421 if (cartSetup.ddindex2simnodeid[i] == 0)
1423 ddindex2xyz(dd->nc, i, dd->master_ci);
1424 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1429 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1434 /* No Cartesian communicators */
1435 /* We use the rank in dd->comm->all as DD index */
1436 ddindex2xyz(dd->nc, dd->rank, dd->ci);
1437 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1439 clear_ivec(dd->master_ci);
1443 GMX_LOG(mdlog.info).appendTextFormatted(
1444 "Domain decomposition rank %d, coordinates %d %d %d\n",
1445 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1449 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
1450 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1454 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
1458 CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1460 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1462 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1463 std::vector<int> buf(dd->nnodes);
1464 if (thisRankHasDuty(cr, DUTY_PP))
1466 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
1468 /* Communicate the ddindex to simulation nodeid index */
1469 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1470 cr->mpi_comm_mysim);
1473 GMX_UNUSED_VALUE(dd);
1474 GMX_UNUSED_VALUE(cr);
1478 static CartesianRankSetup
1479 split_communicator(const gmx::MDLogger &mdlog,
1481 const DdRankOrder ddRankOrder,
1482 bool gmx_unused reorder,
1483 const DDRankSetup &ddRankSetup,
1485 std::vector<int> *pmeRanks)
1487 CartesianRankSetup cartSetup;
1489 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1490 cartSetup.bCartesianPP_PME = false;
1492 const ivec &numDDCells = ddRankSetup.numPPCells;
1493 /* Initially we set ntot to the number of PP cells */
1494 copy_ivec(numDDCells, cartSetup.ntot);
1496 if (cartSetup.bCartesianPP)
1498 const int numDDCellsTot = ddRankSetup.numPPRanks;
1500 for (int i = 1; i < DIM; i++)
1502 bDiv[i] = ((ddRankSetup.numRanksDoingPme*numDDCells[i]) % numDDCellsTot == 0);
1504 if (bDiv[YY] || bDiv[ZZ])
1506 cartSetup.bCartesianPP_PME = TRUE;
1507 /* If we have 2D PME decomposition, which is always in x+y,
1508 * we stack the PME only nodes in z.
1509 * Otherwise we choose the direction that provides the thinnest slab
1510 * of PME only nodes as this will have the least effect
1511 * on the PP communication.
1512 * But for the PME communication the opposite might be better.
1514 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 ||
1516 numDDCells[YY] > numDDCells[ZZ]))
1518 cartSetup.cartpmedim = ZZ;
1522 cartSetup.cartpmedim = YY;
1524 cartSetup.ntot[cartSetup.cartpmedim]
1525 += (ddRankSetup.numRanksDoingPme*numDDCells[cartSetup.cartpmedim])/numDDCellsTot;
1529 GMX_LOG(mdlog.info).appendTextFormatted(
1530 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
1531 ddRankSetup.numRanksDoingPme,
1532 numDDCells[XX], numDDCells[YY],
1533 numDDCells[XX], numDDCells[ZZ]);
1534 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1538 if (cartSetup.bCartesianPP_PME)
1544 GMX_LOG(mdlog.info).appendTextFormatted(
1545 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1546 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1548 for (int i = 0; i < DIM; i++)
1553 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1555 MPI_Comm_rank(comm_cart, &rank);
1556 if (MASTER(cr) && rank != 0)
1558 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1561 /* With this assigment we loose the link to the original communicator
1562 * which will usually be MPI_COMM_WORLD, unless have multisim.
1564 cr->mpi_comm_mysim = comm_cart;
1565 cr->sim_nodeid = rank;
1567 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1569 GMX_LOG(mdlog.info).appendTextFormatted(
1570 "Cartesian rank %d, coordinates %d %d %d\n",
1571 cr->sim_nodeid, ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1573 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1577 if (!ddRankSetup.usePmeOnlyRanks ||
1578 ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1580 cr->duty = DUTY_PME;
1583 /* Split the sim communicator into PP and PME only nodes */
1584 MPI_Comm_split(cr->mpi_comm_mysim,
1585 getThisRankDuties(cr),
1586 dd_index(cartSetup.ntot, ddCellIndex),
1587 &cr->mpi_comm_mygroup);
1589 GMX_UNUSED_VALUE(ddCellIndex);
1594 switch (ddRankOrder)
1596 case DdRankOrder::pp_pme:
1597 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1599 case DdRankOrder::interleave:
1600 /* Interleave the PP-only and PME-only ranks */
1601 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1602 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1604 case DdRankOrder::cartesian:
1607 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1610 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1612 cr->duty = DUTY_PME;
1619 /* Split the sim communicator into PP and PME only nodes */
1620 MPI_Comm_split(cr->mpi_comm_mysim,
1621 getThisRankDuties(cr),
1623 &cr->mpi_comm_mygroup);
1624 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1628 GMX_LOG(mdlog.info).appendTextFormatted(
1629 "This rank does only %s work.\n",
1630 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1635 /*! \brief Makes the PP communicator and the PME communicator, when needed
1637 * Returns the Cartesian rank setup.
1638 * Sets \p cr->mpi_comm_mygroup
1639 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1640 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1642 static CartesianRankSetup
1643 makeGroupCommunicators(const gmx::MDLogger &mdlog,
1644 const DDSettings &ddSettings,
1645 const DdRankOrder ddRankOrder,
1646 const DDRankSetup &ddRankSetup,
1649 std::vector<int> *pmeRanks)
1651 CartesianRankSetup cartSetup;
1653 if (ddRankSetup.usePmeOnlyRanks)
1655 /* Split the communicator into a PP and PME part */
1657 split_communicator(mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder,
1658 ddRankSetup, ddCellIndex, pmeRanks);
1662 /* All nodes do PP and PME */
1663 /* We do not require separate communicators */
1664 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
1666 cartSetup.bCartesianPP = false;
1667 cartSetup.bCartesianPP_PME = false;
1673 /*! \brief For PP ranks, sets or makes the communicator
1675 * For PME ranks get the rank id.
1676 * For PP only ranks, sets the PME-only rank.
1678 static void setupGroupCommunication(const gmx::MDLogger &mdlog,
1679 const DDSettings &ddSettings,
1680 gmx::ArrayRef<const int> pmeRanks,
1682 const int numAtomsInSystem,
1685 const DDRankSetup &ddRankSetup = dd->comm->ddRankSetup;
1686 const CartesianRankSetup &cartSetup = dd->comm->cartesianRankSetup;
1688 if (thisRankHasDuty(cr, DUTY_PP))
1690 /* Copy or make a new PP communicator */
1692 /* We (possibly) reordered the nodes in split_communicator,
1693 * so it is no longer required in make_pp_communicator.
1695 const bool useCartesianReorder =
1696 (ddSettings.useCartesianReorder &&
1697 !cartSetup.bCartesianPP_PME);
1699 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1703 receive_ddindex2simnodeid(dd, cr);
1706 if (!thisRankHasDuty(cr, DUTY_PME))
1708 /* Set up the commnuication to our PME node */
1709 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1710 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1713 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
1714 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
1719 dd->pme_nodeid = -1;
1722 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1725 dd->ma = std::make_unique<AtomDistribution>(dd->nc,
1731 static real *get_slb_frac(const gmx::MDLogger &mdlog,
1732 const char *dir, int nc, const char *size_string)
1734 real *slb_frac, tot;
1739 if (nc > 1 && size_string != nullptr)
1741 GMX_LOG(mdlog.info).appendTextFormatted(
1742 "Using static load balancing for the %s direction", dir);
1745 for (i = 0; i < nc; i++)
1748 sscanf(size_string, "%20lf%n", &dbl, &n);
1751 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
1758 std::string relativeCellSizes = "Relative cell sizes:";
1759 for (i = 0; i < nc; i++)
1762 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1764 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1770 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
1773 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1775 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1777 for (auto &ilist : extractILists(*ilists, IF_BOND))
1779 if (NRAL(ilist.functionType) > 2)
1781 n += nmol*(ilist.iatoms.size()/ilistStride(ilist));
1789 static int dd_getenv(const gmx::MDLogger &mdlog,
1790 const char *env_var, int def)
1796 val = getenv(env_var);
1799 if (sscanf(val, "%20d", &nst) <= 0)
1803 GMX_LOG(mdlog.info).appendTextFormatted(
1804 "Found env.var. %s = %s, using value %d",
1811 static void check_dd_restrictions(const gmx_domdec_t *dd,
1812 const t_inputrec *ir,
1813 const gmx::MDLogger &mdlog)
1815 if (ir->ePBC == epbcSCREW &&
1816 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
1818 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
1821 if (ir->nstlist == 0)
1823 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1826 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
1828 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
1832 static real average_cellsize_min(const gmx_ddbox_t &ddbox,
1833 const ivec numDomains)
1835 real r = ddbox.box_size[XX];
1836 for (int d = 0; d < DIM; d++)
1838 if (numDomains[d] > 1)
1840 /* Check using the initial average cell size */
1841 r = std::min(r, ddbox.box_size[d]*ddbox.skew_fac[d]/numDomains[d]);
1848 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1850 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1851 const std::string &reasonStr,
1852 const gmx::MDLogger &mdlog)
1854 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1855 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1857 if (cmdlineDlbState == DlbState::onUser)
1859 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1861 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1863 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1865 return DlbState::offForever;
1868 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1870 * This function parses the parameters of "-dlb" command line option setting
1871 * corresponding state values. Then it checks the consistency of the determined
1872 * state with other run parameters and settings. As a result, the initial state
1873 * may be altered or an error may be thrown if incompatibility of options is detected.
1875 * \param [in] mdlog Logger.
1876 * \param [in] dlbOption Enum value for the DLB option.
1877 * \param [in] bRecordLoad True if the load balancer is recording load information.
1878 * \param [in] mdrunOptions Options for mdrun.
1879 * \param [in] ir Pointer mdrun to input parameters.
1880 * \returns DLB initial/startup state.
1882 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
1883 DlbOption dlbOption, gmx_bool bRecordLoad,
1884 const gmx::MdrunOptions &mdrunOptions,
1885 const t_inputrec *ir)
1887 DlbState dlbState = DlbState::offCanTurnOn;
1891 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1892 case DlbOption::no: dlbState = DlbState::offUser; break;
1893 case DlbOption::yes: dlbState = DlbState::onUser; break;
1894 default: gmx_incons("Invalid dlbOption enum value");
1897 /* Reruns don't support DLB: bail or override auto mode */
1898 if (mdrunOptions.rerun)
1900 std::string reasonStr = "it is not supported in reruns.";
1901 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1904 /* Unsupported integrators */
1905 if (!EI_DYNAMICS(ir->eI))
1907 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1908 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1911 /* Without cycle counters we can't time work to balance on */
1914 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
1915 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1918 if (mdrunOptions.reproducible)
1920 std::string reasonStr = "you started a reproducible run.";
1923 case DlbState::offUser:
1925 case DlbState::offForever:
1926 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1928 case DlbState::offCanTurnOn:
1929 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1930 case DlbState::onCanTurnOff:
1931 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1933 case DlbState::onUser:
1934 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
1936 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
1943 static gmx_domdec_comm_t *init_dd_comm()
1945 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
1947 comm->n_load_have = 0;
1948 comm->n_load_collect = 0;
1950 comm->haveTurnedOffDlb = false;
1952 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1954 comm->sum_nat[i] = 0;
1958 comm->load_step = 0;
1961 clear_ivec(comm->load_lim);
1965 /* This should be replaced by a unique pointer */
1966 comm->balanceRegion = ddBalanceRegionAllocate();
1971 /* Returns whether mtop contains constraints and/or vsites */
1972 static bool systemHasConstraintsOrVsites(const gmx_mtop_t &mtop)
1974 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1976 while (const InteractionLists *ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1978 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1987 static void setupUpdateGroups(const gmx::MDLogger &mdlog,
1988 const gmx_mtop_t &mtop,
1989 const t_inputrec &inputrec,
1990 const real cutoffMargin,
1991 DDSystemInfo *systemInfo)
1993 /* When we have constraints and/or vsites, it is beneficial to use
1994 * update groups (when possible) to allow independent update of groups.
1996 if (!systemHasConstraintsOrVsites(mtop))
1998 /* No constraints or vsites, atoms can be updated independently */
2002 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
2003 systemInfo->useUpdateGroups =
2004 (!systemInfo->updateGroupingPerMoleculetype.empty() &&
2005 getenv("GMX_NO_UPDATEGROUPS") == nullptr);
2007 if (systemInfo->useUpdateGroups)
2009 int numUpdateGroups = 0;
2010 for (const auto &molblock : mtop.molblock)
2012 numUpdateGroups += molblock.nmol*systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
2015 systemInfo->maxUpdateGroupRadius =
2016 computeMaxUpdateGroupRadius(mtop,
2017 systemInfo->updateGroupingPerMoleculetype,
2018 maxReferenceTemperature(inputrec));
2020 /* To use update groups, the large domain-to-domain cutoff distance
2021 * should be compatible with the box size.
2023 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
2025 if (systemInfo->useUpdateGroups)
2027 GMX_LOG(mdlog.info).appendTextFormatted(
2028 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f nm\n",
2030 mtop.natoms/static_cast<double>(numUpdateGroups),
2031 systemInfo->maxUpdateGroupRadius);
2035 GMX_LOG(mdlog.info).appendTextFormatted("The combination of rlist and box size prohibits the use of update groups\n");
2036 systemInfo->updateGroupingPerMoleculetype.clear();
2041 UnitCellInfo::UnitCellInfo(const t_inputrec &ir) :
2042 npbcdim(ePBC2npbcdim(ir.ePBC)),
2043 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2044 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2045 haveScrewPBC(ir.ePBC == epbcSCREW)
2049 /* Returns whether molecules are always whole, i.e. not broken by PBC */
2051 moleculesAreAlwaysWhole(const gmx_mtop_t &mtop,
2052 const bool useUpdateGroups,
2053 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2055 if (useUpdateGroups)
2057 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2058 "Need one grouping per moltype");
2059 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2061 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2069 for (const auto &moltype : mtop.moltype)
2071 if (moltype.atoms.nr > 1)
2081 /*! \brief Generate the simulation system information */
2083 getSystemInfo(const gmx::MDLogger &mdlog,
2084 const t_commrec *cr,
2085 const DomdecOptions &options,
2086 const gmx_mtop_t *mtop,
2087 const t_inputrec *ir,
2089 gmx::ArrayRef<const gmx::RVec> xGlobal)
2091 const real tenPercentMargin = 1.1;
2093 DDSystemInfo systemInfo;
2095 /* We need to decide on update groups early, as this affects communication distances */
2096 systemInfo.useUpdateGroups = false;
2097 if (ir->cutoff_scheme == ecutsVERLET)
2099 real cutoffMargin = std::sqrt(max_cutoff2(ir->ePBC, box)) - ir->rlist;
2100 setupUpdateGroups(mdlog, *mtop, *ir, cutoffMargin, &systemInfo);
2103 systemInfo.moleculesAreAlwaysWhole =
2104 moleculesAreAlwaysWhole(*mtop,
2105 systemInfo.useUpdateGroups,
2106 systemInfo.updateGroupingPerMoleculetype);
2107 systemInfo.haveInterDomainBondeds = (!systemInfo.moleculesAreAlwaysWhole ||
2108 mtop->bIntermolecularInteractions);
2109 systemInfo.haveInterDomainMultiBodyBondeds = (systemInfo.haveInterDomainBondeds &&
2110 multi_body_bondeds_count(mtop) > 0);
2112 if (systemInfo.useUpdateGroups)
2114 systemInfo.haveSplitConstraints = false;
2115 systemInfo.haveSplitSettles = false;
2119 systemInfo.haveSplitConstraints = gmx::inter_charge_group_constraints(*mtop);
2120 systemInfo.haveSplitSettles = gmx::inter_charge_group_settles(*mtop);
2125 /* Set the cut-off to some very large value,
2126 * so we don't need if statements everywhere in the code.
2127 * We use sqrt, since the cut-off is squared in some places.
2129 systemInfo.cutoff = GMX_CUTOFF_INF;
2133 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir->rlist);
2135 systemInfo.minCutoffForMultiBody = 0;
2137 /* Determine the minimum cell size limit, affected by many factors */
2138 systemInfo.cellsizeLimit = 0;
2139 systemInfo.filterBondedCommunication = false;
2141 /* We do not allow home atoms to move beyond the neighboring domain
2142 * between domain decomposition steps, which limits the cell size.
2143 * Get an estimate of cell size limit due to atom displacement.
2144 * In most cases this is a large overestimate, because it assumes
2145 * non-interaction atoms.
2146 * We set the chance to 1 in a trillion steps.
2148 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2149 const real limitForAtomDisplacement =
2150 minCellSizeForAtomDisplacement(*mtop, *ir,
2151 systemInfo.updateGroupingPerMoleculetype,
2152 c_chanceThatAtomMovesBeyondDomain);
2153 GMX_LOG(mdlog.info).appendTextFormatted(
2154 "Minimum cell size due to atom displacement: %.3f nm",
2155 limitForAtomDisplacement);
2157 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2158 limitForAtomDisplacement);
2160 /* TODO: PME decomposition currently requires atoms not to be more than
2161 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2162 * In nearly all cases, limitForAtomDisplacement will be smaller
2163 * than 2/3*rlist, so the PME requirement is satisfied.
2164 * But it would be better for both correctness and performance
2165 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2166 * Note that we would need to improve the pairlist buffer case.
2169 if (systemInfo.haveInterDomainBondeds)
2171 if (options.minimumCommunicationRange > 0)
2173 systemInfo.minCutoffForMultiBody =
2174 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2175 if (options.useBondedCommunication)
2177 systemInfo.filterBondedCommunication = (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2181 systemInfo.cutoff = std::max(systemInfo.cutoff,
2182 systemInfo.minCutoffForMultiBody);
2185 else if (ir->bPeriodicMols)
2187 /* Can not easily determine the required cut-off */
2188 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
2189 systemInfo.minCutoffForMultiBody = systemInfo.cutoff/2;
2197 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
2198 options.checkBondedInteractions,
2201 gmx_bcast(sizeof(r_2b), &r_2b, cr);
2202 gmx_bcast(sizeof(r_mb), &r_mb, cr);
2204 /* We use an initial margin of 10% for the minimum cell size,
2205 * except when we are just below the non-bonded cut-off.
2207 if (options.useBondedCommunication)
2209 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2211 const real r_bonded = std::max(r_2b, r_mb);
2212 systemInfo.minCutoffForMultiBody = tenPercentMargin*r_bonded;
2213 /* This is the (only) place where we turn on the filtering */
2214 systemInfo.filterBondedCommunication = true;
2218 const real r_bonded = r_mb;
2219 systemInfo.minCutoffForMultiBody = std::min(tenPercentMargin*r_bonded,
2222 /* We determine cutoff_mbody later */
2223 systemInfo.increaseMultiBodyCutoff = true;
2227 /* No special bonded communication,
2228 * simply increase the DD cut-off.
2230 systemInfo.minCutoffForMultiBody = tenPercentMargin*std::max(r_2b, r_mb);
2231 systemInfo.cutoff = std::max(systemInfo.cutoff,
2232 systemInfo.minCutoffForMultiBody);
2235 GMX_LOG(mdlog.info).appendTextFormatted(
2236 "Minimum cell size due to bonded interactions: %.3f nm",
2237 systemInfo.minCutoffForMultiBody);
2239 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2240 systemInfo.minCutoffForMultiBody);
2243 systemInfo.constraintCommunicationRange = 0;
2244 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2246 /* There is a cell size limit due to the constraints (P-LINCS) */
2247 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, mtop, ir);
2248 GMX_LOG(mdlog.info).appendTextFormatted(
2249 "Estimated maximum distance required for P-LINCS: %.3f nm",
2250 systemInfo.constraintCommunicationRange);
2251 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2253 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
2256 else if (options.constraintCommunicationRange > 0)
2258 /* Here we do not check for dd->splitConstraints.
2259 * because one can also set a cell size limit for virtual sites only
2260 * and at this point we don't know yet if there are intercg v-sites.
2262 GMX_LOG(mdlog.info).appendTextFormatted(
2263 "User supplied maximum distance required for P-LINCS: %.3f nm",
2264 options.constraintCommunicationRange);
2265 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2267 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit,
2268 systemInfo.constraintCommunicationRange);
2273 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2276 checkDDGridSetup(const DDGridSetup &ddGridSetup,
2277 const t_commrec *cr,
2278 const DomdecOptions &options,
2279 const DDSettings &ddSettings,
2280 const DDSystemInfo &systemInfo,
2281 const real cellsizeLimit,
2282 const gmx_ddbox_t &ddbox)
2284 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2287 gmx_bool bC = (systemInfo.haveSplitConstraints &&
2288 systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2289 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
2290 !bC ? "-rdd" : "-rcon",
2291 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2292 bC ? " or your LINCS settings" : "");
2294 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2295 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
2297 "Look in the log file for details on the domain decomposition",
2298 cr->nnodes - ddGridSetup.numPmeOnlyRanks,
2303 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2304 if (acs < cellsizeLimit)
2306 if (options.numCells[XX] <= 0)
2308 GMX_RELEASE_ASSERT(false, "dd_choose_grid() should return a grid that satisfies the cell size limits");
2312 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2313 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
2314 acs, cellsizeLimit);
2318 const int numPPRanks = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2319 if (cr->nnodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2321 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2322 "The size of the domain decomposition grid (%d) does not match the number of PP ranks (%d). The total number of ranks is %d",
2323 numPPRanks, cr->nnodes - ddGridSetup.numPmeOnlyRanks, cr->nnodes);
2325 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2327 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
2328 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", ddGridSetup.numPmeOnlyRanks, numPPRanks);
2332 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
2334 getDDRankSetup(const gmx::MDLogger &mdlog,
2336 const DDGridSetup &ddGridSetup,
2337 const t_inputrec &ir)
2339 GMX_LOG(mdlog.info).appendTextFormatted(
2340 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2341 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY], ddGridSetup.numDomains[ZZ],
2342 ddGridSetup.numPmeOnlyRanks);
2344 DDRankSetup ddRankSetup;
2346 ddRankSetup.numPPRanks = cr->nnodes - ddGridSetup.numPmeOnlyRanks;
2347 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2349 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2350 if (ddRankSetup.usePmeOnlyRanks)
2352 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2356 ddRankSetup.numRanksDoingPme = ddGridSetup.numDomains[XX]*ddGridSetup.numDomains[YY]*ddGridSetup.numDomains[ZZ];
2359 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2361 /* The following choices should match those
2362 * in comm_cost_est in domdec_setup.c.
2363 * Note that here the checks have to take into account
2364 * that the decomposition might occur in a different order than xyz
2365 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2366 * in which case they will not match those in comm_cost_est,
2367 * but since that is mainly for testing purposes that's fine.
2369 if (ddGridSetup.numDDDimensions >= 2 &&
2370 ddGridSetup.ddDimensions[0] == XX &&
2371 ddGridSetup.ddDimensions[1] == YY &&
2372 ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX] &&
2373 ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0 &&
2374 getenv("GMX_PMEONEDD") == nullptr)
2376 ddRankSetup.npmedecompdim = 2;
2377 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2378 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme/ddRankSetup.npmenodes_x;
2382 /* In case nc is 1 in both x and y we could still choose to
2383 * decompose pme in y instead of x, but we use x for simplicity.
2385 ddRankSetup.npmedecompdim = 1;
2386 if (ddGridSetup.ddDimensions[0] == YY)
2388 ddRankSetup.npmenodes_x = 1;
2389 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2393 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2394 ddRankSetup.npmenodes_y = 1;
2397 GMX_LOG(mdlog.info).appendTextFormatted(
2398 "PME domain decomposition: %d x %d x %d",
2399 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2403 ddRankSetup.npmedecompdim = 0;
2404 ddRankSetup.npmenodes_x = 0;
2405 ddRankSetup.npmenodes_y = 0;
2411 /*! \brief Set the cell size and interaction limits */
2412 static void set_dd_limits(const gmx::MDLogger &mdlog,
2413 t_commrec *cr, gmx_domdec_t *dd,
2414 const DomdecOptions &options,
2415 const DDSettings &ddSettings,
2416 const DDSystemInfo &systemInfo,
2417 const DDGridSetup &ddGridSetup,
2418 const int numPPRanks,
2419 const gmx_mtop_t *mtop,
2420 const t_inputrec *ir,
2421 const gmx_ddbox_t &ddbox)
2423 gmx_domdec_comm_t *comm = dd->comm;
2424 comm->ddSettings = ddSettings;
2426 /* Initialize to GPU share count to 0, might change later */
2427 comm->nrank_gpu_shared = 0;
2429 comm->dlbState = comm->ddSettings.initialDlbState;
2430 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2431 /* To consider turning DLB on after 2*nstlist steps we need to check
2432 * at partitioning count 3. Thus we need to increase the first count by 2.
2434 comm->ddPartioningCountFirstDlbOff += 2;
2436 comm->bPMELoadBalDLBLimits = FALSE;
2438 /* Allocate the charge group/atom sorting struct */
2439 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2441 comm->systemInfo = systemInfo;
2443 if (systemInfo.useUpdateGroups)
2445 /* Note: We would like to use dd->nnodes for the atom count estimate,
2446 * but that is not yet available here. But this anyhow only
2447 * affect performance up to the second dd_partition_system call.
2449 const int homeAtomCountEstimate = mtop->natoms/numPPRanks;
2450 comm->updateGroupsCog =
2451 std::make_unique<gmx::UpdateGroupsCog>(*mtop,
2452 systemInfo.updateGroupingPerMoleculetype,
2453 maxReferenceTemperature(*ir),
2454 homeAtomCountEstimate);
2457 /* Set the DD setup given by ddGridSetup */
2458 copy_ivec(ddGridSetup.numDomains, dd->nc);
2459 dd->ndim = ddGridSetup.numDDDimensions;
2460 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2462 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
2464 snew(comm->slb_frac, DIM);
2465 if (isDlbDisabled(comm))
2467 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
2468 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
2469 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
2472 /* Set the multi-body cut-off and cellsize limit for DLB */
2473 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2474 comm->cellsize_limit = systemInfo.cellsizeLimit;
2475 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2477 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2479 /* Set the bonded communication distance to halfway
2480 * the minimum and the maximum,
2481 * since the extra communication cost is nearly zero.
2483 real acs = average_cellsize_min(ddbox, dd->nc);
2484 comm->cutoff_mbody = 0.5*(systemInfo.minCutoffForMultiBody + acs);
2485 if (!isDlbDisabled(comm))
2487 /* Check if this does not limit the scaling */
2488 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
2489 options.dlbScaling*acs);
2491 if (!systemInfo.filterBondedCommunication)
2493 /* Without bBondComm do not go beyond the n.b. cut-off */
2494 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2495 if (comm->cellsize_limit >= systemInfo.cutoff)
2497 /* We don't loose a lot of efficieny
2498 * when increasing it to the n.b. cut-off.
2499 * It can even be slightly faster, because we need
2500 * less checks for the communication setup.
2502 comm->cutoff_mbody = systemInfo.cutoff;
2505 /* Check if we did not end up below our original limit */
2506 comm->cutoff_mbody = std::max(comm->cutoff_mbody,
2507 systemInfo.minCutoffForMultiBody);
2509 if (comm->cutoff_mbody > comm->cellsize_limit)
2511 comm->cellsize_limit = comm->cutoff_mbody;
2514 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2519 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
2520 "cellsize limit %f\n",
2521 gmx::boolToString(systemInfo.filterBondedCommunication),
2522 comm->cellsize_limit);
2527 check_dd_restrictions(dd, ir, mdlog);
2531 void dd_init_bondeds(FILE *fplog,
2533 const gmx_mtop_t *mtop,
2534 const gmx_vsite_t *vsite,
2535 const t_inputrec *ir,
2537 cginfo_mb_t *cginfo_mb)
2539 gmx_domdec_comm_t *comm;
2541 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
2545 if (comm->systemInfo.filterBondedCommunication)
2547 /* Communicate atoms beyond the cut-off for bonded interactions */
2548 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2552 /* Only communicate atoms based on cut-off */
2553 comm->bondedLinks = nullptr;
2557 static void writeSettings(gmx::TextWriter *log,
2559 const gmx_mtop_t *mtop,
2560 const t_inputrec *ir,
2561 gmx_bool bDynLoadBal,
2563 const gmx_ddbox_t *ddbox)
2565 gmx_domdec_comm_t *comm;
2574 log->writeString("The maximum number of communication pulses is:");
2575 for (d = 0; d < dd->ndim; d++)
2577 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2579 log->ensureLineBreak();
2580 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
2581 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2582 log->writeString("The allowed shrink of domain decomposition cells is:");
2583 for (d = 0; d < DIM; d++)
2587 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
2594 comm->cellsize_min_dlb[d]/
2595 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
2597 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2600 log->ensureLineBreak();
2604 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2605 log->writeString("The initial number of communication pulses is:");
2606 for (d = 0; d < dd->ndim; d++)
2608 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2610 log->ensureLineBreak();
2611 log->writeString("The initial domain decomposition cell size is:");
2612 for (d = 0; d < DIM; d++)
2616 log->writeStringFormatted(" %c %.2f nm",
2617 dim2char(d), dd->comm->cellsize_min[d]);
2620 log->ensureLineBreak();
2624 const bool haveInterDomainVsites =
2625 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2627 if (comm->systemInfo.haveInterDomainBondeds ||
2628 haveInterDomainVsites ||
2629 comm->systemInfo.haveSplitConstraints ||
2630 comm->systemInfo.haveSplitSettles)
2632 std::string decompUnits;
2633 if (comm->systemInfo.useUpdateGroups)
2635 decompUnits = "atom groups";
2639 decompUnits = "atoms";
2642 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:", decompUnits.c_str());
2643 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
2647 limit = dd->comm->cellsize_limit;
2651 if (dd->unitCellInfo.ddBoxIsDynamic)
2653 log->writeLine("(the following are initial values, they could change due to box deformation)");
2655 limit = dd->comm->cellsize_min[XX];
2656 for (d = 1; d < DIM; d++)
2658 limit = std::min(limit, dd->comm->cellsize_min[d]);
2662 if (comm->systemInfo.haveInterDomainBondeds)
2664 log->writeLineFormatted("%40s %-7s %6.3f nm",
2665 "two-body bonded interactions", "(-rdd)",
2666 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2667 log->writeLineFormatted("%40s %-7s %6.3f nm",
2668 "multi-body bonded interactions", "(-rdd)",
2669 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->systemInfo.cutoff, limit));
2671 if (haveInterDomainVsites)
2673 log->writeLineFormatted("%40s %-7s %6.3f nm",
2674 "virtual site constructions", "(-rcon)", limit);
2676 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2678 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
2680 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
2681 separation.c_str(), "(-rcon)", limit);
2683 log->ensureLineBreak();
2687 static void logSettings(const gmx::MDLogger &mdlog,
2689 const gmx_mtop_t *mtop,
2690 const t_inputrec *ir,
2692 const gmx_ddbox_t *ddbox)
2694 gmx::StringOutputStream stream;
2695 gmx::TextWriter log(&stream);
2696 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2697 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2700 log.ensureEmptyLine();
2701 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
2703 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2705 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2708 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
2711 const t_inputrec *ir,
2712 const gmx_ddbox_t *ddbox)
2714 gmx_domdec_comm_t *comm;
2715 int d, dim, npulse, npulse_d_max, npulse_d;
2720 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2722 /* Determine the maximum number of comm. pulses in one dimension */
2724 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2726 /* Determine the maximum required number of grid pulses */
2727 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2729 /* Only a single pulse is required */
2732 else if (!bNoCutOff && comm->cellsize_limit > 0)
2734 /* We round down slightly here to avoid overhead due to the latency
2735 * of extra communication calls when the cut-off
2736 * would be only slightly longer than the cell size.
2737 * Later cellsize_limit is redetermined,
2738 * so we can not miss interactions due to this rounding.
2740 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff/comm->cellsize_limit);
2744 /* There is no cell size limit */
2745 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
2748 if (!bNoCutOff && npulse > 1)
2750 /* See if we can do with less pulses, based on dlb_scale */
2752 for (d = 0; d < dd->ndim; d++)
2755 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->systemInfo.cutoff
2756 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
2757 npulse_d_max = std::max(npulse_d_max, npulse_d);
2759 npulse = std::min(npulse, npulse_d_max);
2762 /* This env var can override npulse */
2763 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2770 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
2771 for (d = 0; d < dd->ndim; d++)
2773 if (comm->ddSettings.request1DAnd1Pulse)
2775 comm->cd[d].np_dlb = 1;
2779 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
2780 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2782 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
2784 comm->bVacDLBNoLimit = FALSE;
2788 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2789 if (!comm->bVacDLBNoLimit)
2791 comm->cellsize_limit = std::max(comm->cellsize_limit,
2792 comm->systemInfo.cutoff/comm->maxpulse);
2794 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2795 /* Set the minimum cell size for each DD dimension */
2796 for (d = 0; d < dd->ndim; d++)
2798 if (comm->bVacDLBNoLimit ||
2799 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->systemInfo.cutoff)
2801 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2805 comm->cellsize_min_dlb[dd->dim[d]] =
2806 comm->systemInfo.cutoff/comm->cd[d].np_dlb;
2809 if (comm->cutoff_mbody <= 0)
2811 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2819 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t &dd)
2821 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2824 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
2826 /* If each molecule is a single charge group
2827 * or we use domain decomposition for each periodic dimension,
2828 * we do not need to take pbc into account for the bonded interactions.
2830 return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds &&
2833 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
2836 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
2837 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
2838 gmx_domdec_t *dd, real dlb_scale,
2839 const gmx_mtop_t *mtop, const t_inputrec *ir,
2840 const gmx_ddbox_t *ddbox)
2842 gmx_domdec_comm_t *comm = dd->comm;
2843 DDRankSetup &ddRankSetup = comm->ddRankSetup;
2845 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2847 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2848 if (ddRankSetup.npmedecompdim >= 2)
2850 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2855 ddRankSetup.numRanksDoingPme = 0;
2856 if (dd->pme_nodeid >= 0)
2858 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2859 "Can not have separate PME ranks without PME electrostatics");
2865 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2867 if (!isDlbDisabled(comm))
2869 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2872 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2875 if (ir->ePBC == epbcNONE)
2877 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
2882 (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))/static_cast<double>(dd->nnodes);
2886 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2888 int natoms_tot = mtop->natoms;
2890 dd->ga2la = new gmx_ga2la_t(natoms_tot,
2891 static_cast<int>(vol_frac*natoms_tot));
2894 /*! \brief Get some important DD parameters which can be modified by env.vars */
2896 getDDSettings(const gmx::MDLogger &mdlog,
2897 const DomdecOptions &options,
2898 const gmx::MdrunOptions &mdrunOptions,
2899 const t_inputrec &ir)
2901 DDSettings ddSettings;
2903 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2904 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2905 ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
2906 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2907 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2908 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2909 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2910 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2911 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2912 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2914 if (ddSettings.useSendRecv2)
2916 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
2919 if (ddSettings.eFlop)
2921 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2922 ddSettings.recordLoad = true;
2926 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2929 ddSettings.initialDlbState =
2930 determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, &ir);
2931 GMX_LOG(mdlog.info).appendTextFormatted("Dynamic load balancing: %s",
2932 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2937 gmx_domdec_t::gmx_domdec_t(const t_inputrec &ir) :
2942 /*! \brief Return whether the simulation described can run a 1D single-pulse DD.
2944 * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
2945 * generally requires a larger box than other possible decompositions
2946 * with the same rank count, so the calling code might need to decide
2947 * what is the most appropriate way to run the simulation based on
2948 * whether such a DD is possible.
2950 * This function works like init_domain_decomposition(), but will not
2951 * give a fatal error, and only uses \c cr for communicating between
2954 * It is safe to call before thread-MPI spawns ranks, so that
2955 * thread-MPI can decide whether and how to trigger the GPU halo
2956 * exchange code path. The number of PME ranks, if any, should be set
2957 * in \c options.numPmeRanks.
2960 canMake1DAnd1PulseDomainDecomposition(const DDSettings &ddSettingsOriginal,
2961 const t_commrec *cr,
2962 const int numRanksRequested,
2963 const DomdecOptions &options,
2964 const gmx_mtop_t &mtop,
2965 const t_inputrec &ir,
2967 gmx::ArrayRef<const gmx::RVec> xGlobal)
2969 // Ensure we don't write any output from this checking routine
2970 gmx::MDLogger dummyLogger;
2972 DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, &mtop, &ir, box, xGlobal);
2974 DDSettings ddSettings = ddSettingsOriginal;
2975 ddSettings.request1DAnd1Pulse = true;
2976 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(dummyLogger, ddSettings.request1DAnd1Pulse,
2977 !isDlbDisabled(ddSettings.initialDlbState),
2978 options.dlbScaling, ir,
2979 systemInfo.cellsizeLimit);
2980 gmx_ddbox_t ddbox = {0};
2981 DDGridSetup ddGridSetup = getDDGridSetup(dummyLogger, cr, numRanksRequested, options,
2982 ddSettings, systemInfo, gridSetupCellsizeLimit,
2983 mtop, ir, box, xGlobal, &ddbox);
2985 const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
2987 return canMakeDDWith1DAnd1Pulse;
2990 bool is1DAnd1PulseDD(const gmx_domdec_t &dd)
2992 const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
2993 const int productOfDimensionSizes = dd.nc[XX]*dd.nc[YY]*dd.nc[ZZ];
2994 const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
2996 const bool hasMax1Pulse =
2997 ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff) ||
2998 (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
3000 return decompositionHasOneDimension && hasMax1Pulse;
3004 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
3006 const DomdecOptions &options,
3007 const gmx::MdrunOptions &mdrunOptions,
3008 const bool prefer1DAnd1Pulse,
3009 const gmx_mtop_t *mtop,
3010 const t_inputrec *ir,
3012 gmx::ArrayRef<const gmx::RVec> xGlobal,
3013 gmx::LocalAtomSetManager *atomSets)
3015 GMX_LOG(mdlog.info).appendTextFormatted(
3016 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
3018 DDSettings ddSettings = getDDSettings(mdlog, options, mdrunOptions, *ir);
3020 if (prefer1DAnd1Pulse &&
3021 canMake1DAnd1PulseDomainDecomposition(ddSettings, cr, cr->nnodes, options,
3022 *mtop, *ir, box, xGlobal))
3024 ddSettings.request1DAnd1Pulse = true;
3027 if (ddSettings.eFlop > 1)
3029 /* Ensure that we have different random flop counts on different ranks */
3030 srand(1 + cr->nodeid);
3033 DDSystemInfo systemInfo = getSystemInfo(mdlog, cr, options, mtop, ir, box, xGlobal);
3035 int numRanksRequested = cr->nnodes;
3036 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir->coulombtype), options.numPmeRanks);
3038 // DD grid setup uses a more different cell size limit for
3039 // automated setup than the one in systemInfo. The latter is used
3040 // in set_dd_limits() to configure DLB, for example.
3041 const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(mdlog, ddSettings.request1DAnd1Pulse,
3042 !isDlbDisabled(ddSettings.initialDlbState),
3043 options.dlbScaling, *ir,
3044 systemInfo.cellsizeLimit);
3045 gmx_ddbox_t ddbox = {0};
3046 DDGridSetup ddGridSetup = getDDGridSetup(mdlog, cr, numRanksRequested, options,
3047 ddSettings, systemInfo, gridSetupCellsizeLimit,
3048 *mtop, *ir, box, xGlobal, &ddbox);
3049 checkDDGridSetup(ddGridSetup, cr, options, ddSettings, systemInfo, gridSetupCellsizeLimit, ddbox);
3051 cr->npmenodes = ddGridSetup.numPmeOnlyRanks;
3053 DDRankSetup ddRankSetup = getDDRankSetup(mdlog, cr, ddGridSetup, *ir);
3055 /* Generate the group communicator, also decides the duty of each rank */
3056 ivec ddCellIndex = { 0, 0, 0 };
3057 std::vector<int> pmeRanks;
3058 CartesianRankSetup cartSetup =
3059 makeGroupCommunicators(mdlog, ddSettings, options.rankOrder,
3061 ddCellIndex, &pmeRanks);
3063 gmx_domdec_t *dd = new gmx_domdec_t(*ir);
3065 copy_ivec(ddCellIndex, dd->ci);
3067 dd->comm = init_dd_comm();
3069 dd->comm->ddRankSetup = ddRankSetup;
3070 dd->comm->cartesianRankSetup = cartSetup;
3072 set_dd_limits(mdlog, cr, dd, options,
3073 ddSettings, systemInfo,
3075 ddRankSetup.numPPRanks,
3079 setupGroupCommunication(mdlog, ddSettings, pmeRanks, cr, mtop->natoms, dd);
3081 if (thisRankHasDuty(cr, DUTY_PP))
3083 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
3085 setup_neighbor_relations(dd);
3088 /* Set overallocation to avoid frequent reallocation of arrays */
3089 set_over_alloc_dd(TRUE);
3091 dd->atomSets = atomSets;
3096 static gmx_bool test_dd_cutoff(t_commrec *cr,
3098 gmx::ArrayRef<const gmx::RVec> x,
3099 real cutoffRequested)
3109 set_ddbox(*dd, false, box, true, x, &ddbox);
3113 for (d = 0; d < dd->ndim; d++)
3117 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
3118 if (dd->unitCellInfo.ddBoxIsDynamic)
3120 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3123 np = 1 + static_cast<int>(cutoffRequested*inv_cell_size*ddbox.skew_fac[dim]);
3125 if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
3130 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3132 if (np > dd->comm->cd[d].np_dlb)
3137 /* If a current local cell size is smaller than the requested
3138 * cut-off, we could still fix it, but this gets very complicated.
3139 * Without fixing here, we might actually need more checks.
3141 real cellSizeAlongDim = (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim];
3142 if (cellSizeAlongDim*dd->comm->cd[d].np_dlb < cutoffRequested)
3149 if (!isDlbDisabled(dd->comm))
3151 /* If DLB is not active yet, we don't need to check the grid jumps.
3152 * Actually we shouldn't, because then the grid jump data is not set.
3154 if (isDlbOn(dd->comm) &&
3155 check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3160 gmx_sumi(1, &LocallyLimited, cr);
3162 if (LocallyLimited > 0)
3171 gmx_bool change_dd_cutoff(t_commrec *cr,
3173 gmx::ArrayRef<const gmx::RVec> x,
3174 real cutoffRequested)
3176 gmx_bool bCutoffAllowed;
3178 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3182 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3185 return bCutoffAllowed;