2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/compat/make_unique.h"
52 #include "gromacs/domdec/collect.h"
53 #include "gromacs/domdec/dlbtiming.h"
54 #include "gromacs/domdec/domdec_network.h"
55 #include "gromacs/domdec/ga2la.h"
56 #include "gromacs/domdec/localatomsetmanager.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/calc_verletbuf.h"
71 #include "gromacs/mdlib/constr.h"
72 #include "gromacs/mdlib/constraintrange.h"
73 #include "gromacs/mdlib/forcerec.h"
74 #include "gromacs/mdlib/gmx_omp_nthreads.h"
75 #include "gromacs/mdlib/lincs.h"
76 #include "gromacs/mdlib/mdatoms.h"
77 #include "gromacs/mdlib/mdrun.h"
78 #include "gromacs/mdlib/mdsetup.h"
79 #include "gromacs/mdlib/nb_verlet.h"
80 #include "gromacs/mdlib/nbnxn_grid.h"
81 #include "gromacs/mdlib/nsgrid.h"
82 #include "gromacs/mdlib/vsite.h"
83 #include "gromacs/mdtypes/commrec.h"
84 #include "gromacs/mdtypes/df_history.h"
85 #include "gromacs/mdtypes/forcerec.h"
86 #include "gromacs/mdtypes/inputrec.h"
87 #include "gromacs/mdtypes/md_enums.h"
88 #include "gromacs/mdtypes/mdatom.h"
89 #include "gromacs/mdtypes/nblist.h"
90 #include "gromacs/mdtypes/state.h"
91 #include "gromacs/pbcutil/ishift.h"
92 #include "gromacs/pbcutil/pbc.h"
93 #include "gromacs/pulling/pull.h"
94 #include "gromacs/timing/wallcycle.h"
95 #include "gromacs/topology/block.h"
96 #include "gromacs/topology/idef.h"
97 #include "gromacs/topology/ifunc.h"
98 #include "gromacs/topology/mtop_lookup.h"
99 #include "gromacs/topology/mtop_util.h"
100 #include "gromacs/topology/topology.h"
101 #include "gromacs/utility/basedefinitions.h"
102 #include "gromacs/utility/basenetwork.h"
103 #include "gromacs/utility/cstringutil.h"
104 #include "gromacs/utility/exceptions.h"
105 #include "gromacs/utility/fatalerror.h"
106 #include "gromacs/utility/gmxmpi.h"
107 #include "gromacs/utility/logger.h"
108 #include "gromacs/utility/real.h"
109 #include "gromacs/utility/smalloc.h"
110 #include "gromacs/utility/strconvert.h"
111 #include "gromacs/utility/stringstream.h"
112 #include "gromacs/utility/stringutil.h"
113 #include "gromacs/utility/textwriter.h"
115 #include "atomdistribution.h"
116 #include "cellsizes.h"
117 #include "distribute.h"
118 #include "domdec_constraints.h"
119 #include "domdec_internal.h"
120 #include "domdec_vsite.h"
121 #include "redistribute.h"
124 #define DD_NLOAD_MAX 9
126 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
128 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
131 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
132 #define DD_FLAG_NRCG 65535
133 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
134 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
136 /* The DD zone order */
137 static const ivec dd_zo[DD_MAXZONE] =
138 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
140 /* The non-bonded zone-pair setup for domain decomposition
141 * The first number is the i-zone, the second number the first j-zone seen by
142 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
143 * As is, this is for 3D decomposition, where there are 4 i-zones.
144 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
145 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
148 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
153 /* Turn on DLB when the load imbalance causes this amount of total loss.
154 * There is a bit of overhead with DLB and it's difficult to achieve
155 * a load imbalance of less than 2% with DLB.
157 #define DD_PERF_LOSS_DLB_ON 0.02
159 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
160 #define DD_PERF_LOSS_WARN 0.05
163 /* We check if to turn on DLB at the first and every 100 DD partitionings.
164 * With large imbalance DLB will turn on at the first step, so we can
165 * make the interval so large that the MPI overhead of the check is negligible.
167 static const int c_checkTurnDlbOnInterval = 100;
168 /* We need to check if DLB results in worse performance and then turn it off.
169 * We check this more often then for turning DLB on, because the DLB can scale
170 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
171 * and furthermore, we are already synchronizing often with DLB, so
172 * the overhead of the MPI Bcast is not that high.
174 static const int c_checkTurnDlbOffInterval = 20;
176 /* Forward declaration */
177 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
181 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
183 static void index2xyz(ivec nc,int ind,ivec xyz)
185 xyz[XX] = ind % nc[XX];
186 xyz[YY] = (ind / nc[XX]) % nc[YY];
187 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
191 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
193 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
194 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
195 xyz[ZZ] = ind % nc[ZZ];
198 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
203 ddindex = dd_index(dd->nc, c);
204 if (dd->comm->bCartesianPP_PME)
206 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
208 else if (dd->comm->bCartesianPP)
211 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
222 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
224 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
227 int ddglatnr(const gmx_domdec_t *dd, int i)
237 if (i >= dd->comm->atomRanges.numAtomsTotal())
239 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
241 atnr = dd->globalAtomIndices[i] + 1;
247 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
249 return &dd->comm->cgs_gl;
252 void dd_store_state(gmx_domdec_t *dd, t_state *state)
256 if (state->ddp_count != dd->ddp_count)
258 gmx_incons("The MD state does not match the domain decomposition state");
261 state->cg_gl.resize(dd->ncg_home);
262 for (i = 0; i < dd->ncg_home; i++)
264 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
267 state->ddp_count_cg_gl = dd->ddp_count;
270 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
272 return &dd->comm->zones;
275 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
276 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
278 gmx_domdec_zones_t *zones;
281 zones = &dd->comm->zones;
284 while (icg >= zones->izone[izone].cg1)
293 else if (izone < zones->nizone)
295 *jcg0 = zones->izone[izone].jcg0;
299 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
300 icg, izone, zones->nizone);
303 *jcg1 = zones->izone[izone].jcg1;
305 for (d = 0; d < dd->ndim; d++)
308 shift0[dim] = zones->izone[izone].shift0[dim];
309 shift1[dim] = zones->izone[izone].shift1[dim];
310 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
312 /* A conservative approach, this can be optimized */
319 int dd_numHomeAtoms(const gmx_domdec_t &dd)
321 return dd.comm->atomRanges.numHomeAtoms();
324 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
326 /* We currently set mdatoms entries for all atoms:
327 * local + non-local + communicated for vsite + constraints
330 return dd->comm->atomRanges.numAtomsTotal();
333 int dd_natoms_vsite(const gmx_domdec_t *dd)
335 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
338 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
340 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
341 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
344 void dd_move_x(gmx_domdec_t *dd,
346 gmx::ArrayRef<gmx::RVec> x,
347 gmx_wallcycle *wcycle)
349 wallcycle_start(wcycle, ewcMOVEX);
352 gmx_domdec_comm_t *comm;
353 gmx_domdec_comm_dim_t *cd;
354 rvec shift = {0, 0, 0};
355 gmx_bool bPBC, bScrew;
359 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
362 nat_tot = comm->atomRanges.numHomeAtoms();
363 for (int d = 0; d < dd->ndim; d++)
365 bPBC = (dd->ci[dd->dim[d]] == 0);
366 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
369 copy_rvec(box[dd->dim[d]], shift);
372 for (const gmx_domdec_ind_t &ind : cd->ind)
374 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
375 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
379 for (int g : ind.index)
381 for (int j : atomGrouping.block(g))
383 sendBuffer[n] = x[j];
390 for (int g : ind.index)
392 for (int j : atomGrouping.block(g))
394 /* We need to shift the coordinates */
395 for (int d = 0; d < DIM; d++)
397 sendBuffer[n][d] = x[j][d] + shift[d];
405 for (int g : ind.index)
407 for (int j : atomGrouping.block(g))
410 sendBuffer[n][XX] = x[j][XX] + shift[XX];
412 * This operation requires a special shift force
413 * treatment, which is performed in calc_vir.
415 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
416 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
422 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
424 gmx::ArrayRef<gmx::RVec> receiveBuffer;
425 if (cd->receiveInPlace)
427 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
431 receiveBuffer = receiveBufferAccess.buffer;
433 /* Send and receive the coordinates */
434 ddSendrecv(dd, d, dddirBackward,
435 sendBuffer, receiveBuffer);
437 if (!cd->receiveInPlace)
440 for (int zone = 0; zone < nzone; zone++)
442 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
444 x[i] = receiveBuffer[j++];
448 nat_tot += ind.nrecv[nzone+1];
453 wallcycle_stop(wcycle, ewcMOVEX);
456 void dd_move_f(gmx_domdec_t *dd,
457 gmx::ArrayRef<gmx::RVec> f,
459 gmx_wallcycle *wcycle)
461 wallcycle_start(wcycle, ewcMOVEF);
464 gmx_domdec_comm_t *comm;
465 gmx_domdec_comm_dim_t *cd;
468 gmx_bool bShiftForcesNeedPbc, bScrew;
472 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
474 nzone = comm->zones.n/2;
475 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
476 for (int d = dd->ndim-1; d >= 0; d--)
478 /* Only forces in domains near the PBC boundaries need to
479 consider PBC in the treatment of fshift */
480 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
481 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
482 if (fshift == nullptr && !bScrew)
484 bShiftForcesNeedPbc = FALSE;
486 /* Determine which shift vector we need */
492 for (int p = cd->numPulses() - 1; p >= 0; p--)
494 const gmx_domdec_ind_t &ind = cd->ind[p];
495 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
496 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
498 nat_tot -= ind.nrecv[nzone+1];
500 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
502 gmx::ArrayRef<gmx::RVec> sendBuffer;
503 if (cd->receiveInPlace)
505 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
509 sendBuffer = sendBufferAccess.buffer;
511 for (int zone = 0; zone < nzone; zone++)
513 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
515 sendBuffer[j++] = f[i];
519 /* Communicate the forces */
520 ddSendrecv(dd, d, dddirForward,
521 sendBuffer, receiveBuffer);
522 /* Add the received forces */
524 if (!bShiftForcesNeedPbc)
526 for (int g : ind.index)
528 for (int j : atomGrouping.block(g))
530 for (int d = 0; d < DIM; d++)
532 f[j][d] += receiveBuffer[n][d];
540 /* fshift should always be defined if this function is
541 * called when bShiftForcesNeedPbc is true */
542 assert(nullptr != fshift);
543 for (int g : ind.index)
545 for (int j : atomGrouping.block(g))
547 for (int d = 0; d < DIM; d++)
549 f[j][d] += receiveBuffer[n][d];
551 /* Add this force to the shift force */
552 for (int d = 0; d < DIM; d++)
554 fshift[is][d] += receiveBuffer[n][d];
562 for (int g : ind.index)
564 for (int j : atomGrouping.block(g))
566 /* Rotate the force */
567 f[j][XX] += receiveBuffer[n][XX];
568 f[j][YY] -= receiveBuffer[n][YY];
569 f[j][ZZ] -= receiveBuffer[n][ZZ];
572 /* Add this force to the shift force */
573 for (int d = 0; d < DIM; d++)
575 fshift[is][d] += receiveBuffer[n][d];
585 wallcycle_stop(wcycle, ewcMOVEF);
588 /* Convenience function for extracting a real buffer from an rvec buffer
590 * To reduce the number of temporary communication buffers and avoid
591 * cache polution, we reuse gmx::RVec buffers for storing reals.
592 * This functions return a real buffer reference with the same number
593 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
595 static gmx::ArrayRef<real>
596 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
598 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
602 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
605 gmx_domdec_comm_t *comm;
606 gmx_domdec_comm_dim_t *cd;
610 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
613 nat_tot = comm->atomRanges.numHomeAtoms();
614 for (int d = 0; d < dd->ndim; d++)
617 for (const gmx_domdec_ind_t &ind : cd->ind)
619 /* Note: We provision for RVec instead of real, so a factor of 3
620 * more than needed. The buffer actually already has this size
621 * and we pass a plain pointer below, so this does not matter.
623 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
624 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
626 for (int g : ind.index)
628 for (int j : atomGrouping.block(g))
630 sendBuffer[n++] = v[j];
634 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
636 gmx::ArrayRef<real> receiveBuffer;
637 if (cd->receiveInPlace)
639 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
643 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
645 /* Send and receive the data */
646 ddSendrecv(dd, d, dddirBackward,
647 sendBuffer, receiveBuffer);
648 if (!cd->receiveInPlace)
651 for (int zone = 0; zone < nzone; zone++)
653 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
655 v[i] = receiveBuffer[j++];
659 nat_tot += ind.nrecv[nzone+1];
665 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
668 gmx_domdec_comm_t *comm;
669 gmx_domdec_comm_dim_t *cd;
673 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
675 nzone = comm->zones.n/2;
676 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
677 for (int d = dd->ndim-1; d >= 0; d--)
680 for (int p = cd->numPulses() - 1; p >= 0; p--)
682 const gmx_domdec_ind_t &ind = cd->ind[p];
684 /* Note: We provision for RVec instead of real, so a factor of 3
685 * more than needed. The buffer actually already has this size
686 * and we typecast, so this works as intended.
688 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
689 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
690 nat_tot -= ind.nrecv[nzone + 1];
692 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
694 gmx::ArrayRef<real> sendBuffer;
695 if (cd->receiveInPlace)
697 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
701 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
703 for (int zone = 0; zone < nzone; zone++)
705 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
707 sendBuffer[j++] = v[i];
711 /* Communicate the forces */
712 ddSendrecv(dd, d, dddirForward,
713 sendBuffer, receiveBuffer);
714 /* Add the received forces */
716 for (int g : ind.index)
718 for (int j : atomGrouping.block(g))
720 v[j] += receiveBuffer[n];
729 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
731 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
733 zone->min0, zone->max1,
734 zone->mch0, zone->mch0,
735 zone->p1_0, zone->p1_1);
738 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
739 * takes the extremes over all home and remote zones in the halo
740 * and returns the results in cell_ns_x0 and cell_ns_x1.
741 * Note: only used with the group cut-off scheme.
743 static void dd_move_cellx(gmx_domdec_t *dd,
744 const gmx_ddbox_t *ddbox,
748 constexpr int c_ddZoneCommMaxNumZones = 5;
749 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
750 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
751 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
752 gmx_domdec_comm_t *comm = dd->comm;
756 for (int d = 1; d < dd->ndim; d++)
758 int dim = dd->dim[d];
759 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
761 /* Copy the base sizes of the home zone */
762 zp.min0 = cell_ns_x0[dim];
763 zp.max1 = cell_ns_x1[dim];
764 zp.min1 = cell_ns_x1[dim];
765 zp.mch0 = cell_ns_x0[dim];
766 zp.mch1 = cell_ns_x1[dim];
767 zp.p1_0 = cell_ns_x0[dim];
768 zp.p1_1 = cell_ns_x1[dim];
771 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
773 /* Loop backward over the dimensions and aggregate the extremes
776 for (int d = dd->ndim - 2; d >= 0; d--)
778 const int dim = dd->dim[d];
779 const bool applyPbc = (dim < ddbox->npbcdim);
781 /* Use an rvec to store two reals */
782 extr_s[d][0] = cellsizes[d + 1].fracLower;
783 extr_s[d][1] = cellsizes[d + 1].fracUpper;
784 extr_s[d][2] = cellsizes[d + 1].fracUpper;
787 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
788 /* Store the extremes in the backward sending buffer,
789 * so they get updated separately from the forward communication.
791 for (int d1 = d; d1 < dd->ndim-1; d1++)
793 /* We invert the order to be able to use the same loop for buf_e */
794 buf_s[pos].min0 = extr_s[d1][1];
795 buf_s[pos].max1 = extr_s[d1][0];
796 buf_s[pos].min1 = extr_s[d1][2];
799 /* Store the cell corner of the dimension we communicate along */
800 buf_s[pos].p1_0 = comm->cell_x0[dim];
805 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
808 if (dd->ndim == 3 && d == 0)
810 buf_s[pos] = comm->zone_d2[0][1];
812 buf_s[pos] = comm->zone_d1[0];
816 /* We only need to communicate the extremes
817 * in the forward direction
819 int numPulses = comm->cd[d].numPulses();
823 /* Take the minimum to avoid double communication */
824 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
828 /* Without PBC we should really not communicate over
829 * the boundaries, but implementing that complicates
830 * the communication setup and therefore we simply
831 * do all communication, but ignore some data.
833 numPulsesMin = numPulses;
835 for (int pulse = 0; pulse < numPulsesMin; pulse++)
837 /* Communicate the extremes forward */
838 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
840 int numElements = dd->ndim - d - 1;
841 ddSendrecv(dd, d, dddirForward,
842 extr_s + d, numElements,
843 extr_r + d, numElements);
845 if (receiveValidData)
847 for (int d1 = d; d1 < dd->ndim - 1; d1++)
849 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
850 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
851 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
856 const int numElementsInBuffer = pos;
857 for (int pulse = 0; pulse < numPulses; pulse++)
859 /* Communicate all the zone information backward */
860 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
862 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
864 int numReals = numElementsInBuffer*c_ddzoneNumReals;
865 ddSendrecv(dd, d, dddirBackward,
866 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
867 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
872 for (int d1 = d + 1; d1 < dd->ndim; d1++)
874 /* Determine the decrease of maximum required
875 * communication height along d1 due to the distance along d,
876 * this avoids a lot of useless atom communication.
878 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
881 if (ddbox->tric_dir[dim])
883 /* c is the off-diagonal coupling between the cell planes
884 * along directions d and d1.
886 c = ddbox->v[dim][dd->dim[d1]][dim];
892 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
895 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
899 /* A negative value signals out of range */
905 /* Accumulate the extremes over all pulses */
906 for (int i = 0; i < numElementsInBuffer; i++)
914 if (receiveValidData)
916 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
917 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
918 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
922 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
930 if (receiveValidData && dh[d1] >= 0)
932 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
933 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
936 /* Copy the received buffer to the send buffer,
937 * to pass the data through with the next pulse.
941 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
942 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
944 /* Store the extremes */
947 for (int d1 = d; d1 < dd->ndim-1; d1++)
949 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
950 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
951 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
955 if (d == 1 || (d == 0 && dd->ndim == 3))
957 for (int i = d; i < 2; i++)
959 comm->zone_d2[1-d][i] = buf_e[pos];
965 comm->zone_d1[1] = buf_e[pos];
974 int dim = dd->dim[1];
975 for (int i = 0; i < 2; i++)
979 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
981 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
982 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
987 int dim = dd->dim[2];
988 for (int i = 0; i < 2; i++)
990 for (int j = 0; j < 2; j++)
994 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
996 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
997 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1001 for (int d = 1; d < dd->ndim; d++)
1003 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1004 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1007 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1008 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1013 static void write_dd_grid_pdb(const char *fn, int64_t step,
1014 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1016 rvec grid_s[2], *grid_r = nullptr, cx, r;
1017 char fname[STRLEN], buf[22];
1019 int a, i, d, z, y, x;
1023 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1024 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1028 snew(grid_r, 2*dd->nnodes);
1031 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1035 for (d = 0; d < DIM; d++)
1037 for (i = 0; i < DIM; i++)
1045 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1047 tric[d][i] = box[i][d]/box[i][i];
1056 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1057 out = gmx_fio_fopen(fname, "w");
1058 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1060 for (i = 0; i < dd->nnodes; i++)
1062 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1063 for (d = 0; d < DIM; d++)
1065 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1067 for (z = 0; z < 2; z++)
1069 for (y = 0; y < 2; y++)
1071 for (x = 0; x < 2; x++)
1073 cx[XX] = grid_r[i*2+x][XX];
1074 cx[YY] = grid_r[i*2+y][YY];
1075 cx[ZZ] = grid_r[i*2+z][ZZ];
1077 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1078 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1082 for (d = 0; d < DIM; d++)
1084 for (x = 0; x < 4; x++)
1088 case 0: y = 1 + i*8 + 2*x; break;
1089 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1090 case 2: y = 1 + i*8 + x; break;
1092 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1096 gmx_fio_fclose(out);
1101 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1102 const gmx_mtop_t *mtop, const t_commrec *cr,
1103 int natoms, const rvec x[], const matrix box)
1105 char fname[STRLEN], buf[22];
1108 const char *atomname, *resname;
1114 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1117 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1119 out = gmx_fio_fopen(fname, "w");
1121 fprintf(out, "TITLE %s\n", title);
1122 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1124 for (int i = 0; i < natoms; i++)
1126 int ii = dd->globalAtomIndices[i];
1127 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1130 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1133 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1139 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1141 b = dd->comm->zones.n;
1145 b = dd->comm->zones.n + 1;
1147 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1148 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1150 fprintf(out, "TER\n");
1152 gmx_fio_fclose(out);
1155 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1157 gmx_domdec_comm_t *comm;
1164 if (comm->bInterCGBondeds)
1166 if (comm->cutoff_mbody > 0)
1168 r = comm->cutoff_mbody;
1172 /* cutoff_mbody=0 means we do not have DLB */
1173 r = comm->cellsize_min[dd->dim[0]];
1174 for (di = 1; di < dd->ndim; di++)
1176 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1178 if (comm->bBondComm)
1180 r = std::max(r, comm->cutoff_mbody);
1184 r = std::min(r, comm->cutoff);
1192 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1196 r_mb = dd_cutoff_multibody(dd);
1198 return std::max(dd->comm->cutoff, r_mb);
1202 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1207 nc = dd->nc[dd->comm->cartpmedim];
1208 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1209 copy_ivec(coord, coord_pme);
1210 coord_pme[dd->comm->cartpmedim] =
1211 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1214 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1219 npme = dd->comm->npmenodes;
1221 /* Here we assign a PME node to communicate with this DD node
1222 * by assuming that the major index of both is x.
1223 * We add cr->npmenodes/2 to obtain an even distribution.
1225 return (ddindex*npme + npme/2)/npp;
1228 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1233 snew(pme_rank, dd->comm->npmenodes);
1235 for (i = 0; i < dd->nnodes; i++)
1237 p0 = ddindex2pmeindex(dd, i);
1238 p1 = ddindex2pmeindex(dd, i+1);
1239 if (i+1 == dd->nnodes || p1 > p0)
1243 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1245 pme_rank[n] = i + 1 + n;
1253 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1261 if (dd->comm->bCartesian) {
1262 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1263 dd_coords2pmecoords(dd,coords,coords_pme);
1264 copy_ivec(dd->ntot,nc);
1265 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1266 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1268 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1270 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1276 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1281 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1283 gmx_domdec_comm_t *comm;
1285 int ddindex, nodeid = -1;
1287 comm = cr->dd->comm;
1292 if (comm->bCartesianPP_PME)
1295 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1300 ddindex = dd_index(cr->dd->nc, coords);
1301 if (comm->bCartesianPP)
1303 nodeid = comm->ddindex2simnodeid[ddindex];
1309 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1321 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1322 const t_commrec gmx_unused *cr,
1327 const gmx_domdec_comm_t *comm = dd->comm;
1329 /* This assumes a uniform x domain decomposition grid cell size */
1330 if (comm->bCartesianPP_PME)
1333 ivec coord, coord_pme;
1334 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1335 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1337 /* This is a PP node */
1338 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1339 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1343 else if (comm->bCartesianPP)
1345 if (sim_nodeid < dd->nnodes)
1347 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1352 /* This assumes DD cells with identical x coordinates
1353 * are numbered sequentially.
1355 if (dd->comm->pmenodes == nullptr)
1357 if (sim_nodeid < dd->nnodes)
1359 /* The DD index equals the nodeid */
1360 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1366 while (sim_nodeid > dd->comm->pmenodes[i])
1370 if (sim_nodeid < dd->comm->pmenodes[i])
1372 pmenode = dd->comm->pmenodes[i];
1380 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1385 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1396 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1400 ivec coord, coord_pme;
1404 std::vector<int> ddranks;
1405 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1407 for (x = 0; x < dd->nc[XX]; x++)
1409 for (y = 0; y < dd->nc[YY]; y++)
1411 for (z = 0; z < dd->nc[ZZ]; z++)
1413 if (dd->comm->bCartesianPP_PME)
1418 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1419 if (dd->ci[XX] == coord_pme[XX] &&
1420 dd->ci[YY] == coord_pme[YY] &&
1421 dd->ci[ZZ] == coord_pme[ZZ])
1423 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1428 /* The slab corresponds to the nodeid in the PME group */
1429 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1431 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1440 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1442 gmx_bool bReceive = TRUE;
1444 if (cr->npmenodes < dd->nnodes)
1446 gmx_domdec_comm_t *comm = dd->comm;
1447 if (comm->bCartesianPP_PME)
1450 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1452 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1453 coords[comm->cartpmedim]++;
1454 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1457 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1458 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1460 /* This is not the last PP node for pmenode */
1465 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1470 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1471 if (cr->sim_nodeid+1 < cr->nnodes &&
1472 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1474 /* This is not the last PP node for pmenode */
1483 static void set_zones_ncg_home(gmx_domdec_t *dd)
1485 gmx_domdec_zones_t *zones;
1488 zones = &dd->comm->zones;
1490 zones->cg_range[0] = 0;
1491 for (i = 1; i < zones->n+1; i++)
1493 zones->cg_range[i] = dd->ncg_home;
1495 /* zone_ncg1[0] should always be equal to ncg_home */
1496 dd->comm->zone_ncg1[0] = dd->ncg_home;
1499 static void restoreAtomGroups(gmx_domdec_t *dd,
1500 const int *gcgs_index, const t_state *state)
1502 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1504 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1505 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1507 globalAtomGroupIndices.resize(atomGroupsState.size());
1508 atomGrouping.clear();
1510 /* Copy back the global charge group indices from state
1511 * and rebuild the local charge group to atom index.
1513 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1515 const int atomGroupGlobal = atomGroupsState[i];
1516 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1517 globalAtomGroupIndices[i] = atomGroupGlobal;
1518 atomGrouping.appendBlock(groupSize);
1521 dd->ncg_home = atomGroupsState.size();
1522 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1524 set_zones_ncg_home(dd);
1527 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1528 t_forcerec *fr, char *bLocalCG)
1530 cginfo_mb_t *cginfo_mb;
1536 cginfo_mb = fr->cginfo_mb;
1537 cginfo = fr->cginfo;
1539 for (cg = cg0; cg < cg1; cg++)
1541 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1545 if (bLocalCG != nullptr)
1547 for (cg = cg0; cg < cg1; cg++)
1549 bLocalCG[index_gl[cg]] = TRUE;
1554 static void make_dd_indices(gmx_domdec_t *dd,
1555 const int *gcgs_index, int cg_start)
1557 const int numZones = dd->comm->zones.n;
1558 const int *zone2cg = dd->comm->zones.cg_range;
1559 const int *zone_ncg1 = dd->comm->zone_ncg1;
1560 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1561 const gmx_bool bCGs = dd->comm->bCGs;
1563 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1564 gmx_ga2la_t &ga2la = *dd->ga2la;
1566 if (zone2cg[1] != dd->ncg_home)
1568 gmx_incons("dd->ncg_zone is not up to date");
1571 /* Make the local to global and global to local atom index */
1572 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1573 globalAtomIndices.resize(a);
1574 for (int zone = 0; zone < numZones; zone++)
1583 cg0 = zone2cg[zone];
1585 int cg1 = zone2cg[zone+1];
1586 int cg1_p1 = cg0 + zone_ncg1[zone];
1588 for (int cg = cg0; cg < cg1; cg++)
1593 /* Signal that this cg is from more than one pulse away */
1596 int cg_gl = globalAtomGroupIndices[cg];
1599 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1601 globalAtomIndices.push_back(a_gl);
1602 ga2la.insert(a_gl, {a, zone1});
1608 globalAtomIndices.push_back(cg_gl);
1609 ga2la.insert(cg_gl, {a, zone1});
1616 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1620 if (bLocalCG == nullptr)
1624 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1626 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1629 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1634 for (int i = 0; i < ncg_sys; i++)
1641 if (ngl != dd->globalAtomGroupIndices.size())
1643 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1650 static void check_index_consistency(gmx_domdec_t *dd,
1651 int natoms_sys, int ncg_sys,
1656 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1658 if (dd->comm->DD_debug > 1)
1660 std::vector<int> have(natoms_sys);
1661 for (int a = 0; a < numAtomsInZones; a++)
1663 int globalAtomIndex = dd->globalAtomIndices[a];
1664 if (have[globalAtomIndex] > 0)
1666 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1670 have[globalAtomIndex] = a + 1;
1675 std::vector<int> have(numAtomsInZones);
1678 for (int i = 0; i < natoms_sys; i++)
1680 if (const auto entry = dd->ga2la->find(i))
1682 const int a = entry->la;
1683 if (a >= numAtomsInZones)
1685 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1691 if (dd->globalAtomIndices[a] != i)
1693 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1700 if (ngl != numAtomsInZones)
1703 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1704 dd->rank, where, ngl, numAtomsInZones);
1706 for (int a = 0; a < numAtomsInZones; a++)
1711 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1712 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1716 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1720 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1721 dd->rank, where, nerr);
1725 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1726 static void clearDDStateIndices(gmx_domdec_t *dd,
1730 gmx_ga2la_t &ga2la = *dd->ga2la;
1734 /* Clear the whole list without the overhead of searching */
1739 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1740 for (int i = 0; i < numAtomsInZones; i++)
1742 ga2la.erase(dd->globalAtomIndices[i]);
1746 char *bLocalCG = dd->comm->bLocalCG;
1749 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1751 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1755 dd_clear_local_vsite_indices(dd);
1757 if (dd->constraints)
1759 dd_clear_local_constraint_indices(dd);
1763 static bool check_grid_jump(int64_t step,
1764 const gmx_domdec_t *dd,
1766 const gmx_ddbox_t *ddbox,
1769 gmx_domdec_comm_t *comm = dd->comm;
1770 bool invalid = false;
1772 for (int d = 1; d < dd->ndim; d++)
1774 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1775 const int dim = dd->dim[d];
1776 const real limit = grid_jump_limit(comm, cutoff, d);
1777 real bfac = ddbox->box_size[dim];
1778 if (ddbox->tric_dir[dim])
1780 bfac *= ddbox->skew_fac[dim];
1782 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1783 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1791 /* This error should never be triggered under normal
1792 * circumstances, but you never know ...
1794 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1795 gmx_step_str(step, buf),
1796 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1804 static float dd_force_load(gmx_domdec_comm_t *comm)
1811 if (comm->eFlop > 1)
1813 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1818 load = comm->cycl[ddCyclF];
1819 if (comm->cycl_n[ddCyclF] > 1)
1821 /* Subtract the maximum of the last n cycle counts
1822 * to get rid of possible high counts due to other sources,
1823 * for instance system activity, that would otherwise
1824 * affect the dynamic load balancing.
1826 load -= comm->cycl_max[ddCyclF];
1830 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1832 float gpu_wait, gpu_wait_sum;
1834 gpu_wait = comm->cycl[ddCyclWaitGPU];
1835 if (comm->cycl_n[ddCyclF] > 1)
1837 /* We should remove the WaitGPU time of the same MD step
1838 * as the one with the maximum F time, since the F time
1839 * and the wait time are not independent.
1840 * Furthermore, the step for the max F time should be chosen
1841 * the same on all ranks that share the same GPU.
1842 * But to keep the code simple, we remove the average instead.
1843 * The main reason for artificially long times at some steps
1844 * is spurious CPU activity or MPI time, so we don't expect
1845 * that changes in the GPU wait time matter a lot here.
1847 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
1849 /* Sum the wait times over the ranks that share the same GPU */
1850 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1851 comm->mpi_comm_gpu_shared);
1852 /* Replace the wait time by the average over the ranks */
1853 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1861 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1863 gmx_domdec_comm_t *comm;
1868 snew(*dim_f, dd->nc[dim]+1);
1870 for (i = 1; i < dd->nc[dim]; i++)
1872 if (comm->slb_frac[dim])
1874 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1878 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1881 (*dim_f)[dd->nc[dim]] = 1;
1884 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1886 int pmeindex, slab, nso, i;
1889 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1895 ddpme->dim = dimind;
1897 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1899 ddpme->nslab = (ddpme->dim == 0 ?
1900 dd->comm->npmenodes_x :
1901 dd->comm->npmenodes_y);
1903 if (ddpme->nslab <= 1)
1908 nso = dd->comm->npmenodes/ddpme->nslab;
1909 /* Determine for each PME slab the PP location range for dimension dim */
1910 snew(ddpme->pp_min, ddpme->nslab);
1911 snew(ddpme->pp_max, ddpme->nslab);
1912 for (slab = 0; slab < ddpme->nslab; slab++)
1914 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1915 ddpme->pp_max[slab] = 0;
1917 for (i = 0; i < dd->nnodes; i++)
1919 ddindex2xyz(dd->nc, i, xyz);
1920 /* For y only use our y/z slab.
1921 * This assumes that the PME x grid size matches the DD grid size.
1923 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1925 pmeindex = ddindex2pmeindex(dd, i);
1928 slab = pmeindex/nso;
1932 slab = pmeindex % ddpme->nslab;
1934 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1935 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1939 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1942 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1944 if (dd->comm->ddpme[0].dim == XX)
1946 return dd->comm->ddpme[0].maxshift;
1954 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1956 if (dd->comm->ddpme[0].dim == YY)
1958 return dd->comm->ddpme[0].maxshift;
1960 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1962 return dd->comm->ddpme[1].maxshift;
1970 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1972 rvec cell_ns_x0, rvec cell_ns_x1,
1975 gmx_domdec_comm_t *comm;
1980 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1982 dim = dd->dim[dim_ind];
1984 /* Without PBC we don't have restrictions on the outer cells */
1985 if (!(dim >= ddbox->npbcdim &&
1986 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1988 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1989 comm->cellsize_min[dim])
1992 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1993 gmx_step_str(step, buf), dim2char(dim),
1994 comm->cell_x1[dim] - comm->cell_x0[dim],
1995 ddbox->skew_fac[dim],
1996 dd->comm->cellsize_min[dim],
1997 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2001 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2003 /* Communicate the boundaries and update cell_ns_x0/1 */
2004 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2005 if (isDlbOn(dd->comm) && dd->ndim > 1)
2007 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2012 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2014 /* Note that the cycles value can be incorrect, either 0 or some
2015 * extremely large value, when our thread migrated to another core
2016 * with an unsynchronized cycle counter. If this happens less often
2017 * that once per nstlist steps, this will not cause issues, since
2018 * we later subtract the maximum value from the sum over nstlist steps.
2019 * A zero count will slightly lower the total, but that's a small effect.
2020 * Note that the main purpose of the subtraction of the maximum value
2021 * is to avoid throwing off the load balancing when stalls occur due
2022 * e.g. system activity or network congestion.
2024 dd->comm->cycl[ddCycl] += cycles;
2025 dd->comm->cycl_n[ddCycl]++;
2026 if (cycles > dd->comm->cycl_max[ddCycl])
2028 dd->comm->cycl_max[ddCycl] = cycles;
2032 static double force_flop_count(t_nrnb *nrnb)
2039 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2041 /* To get closer to the real timings, we half the count
2042 * for the normal loops and again half it for water loops.
2045 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2047 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2051 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2054 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2057 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2059 sum += nrnb->n[i]*cost_nrnb(i);
2062 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2064 sum += nrnb->n[i]*cost_nrnb(i);
2070 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2072 if (dd->comm->eFlop)
2074 dd->comm->flop -= force_flop_count(nrnb);
2077 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2079 if (dd->comm->eFlop)
2081 dd->comm->flop += force_flop_count(nrnb);
2086 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2090 for (i = 0; i < ddCyclNr; i++)
2092 dd->comm->cycl[i] = 0;
2093 dd->comm->cycl_n[i] = 0;
2094 dd->comm->cycl_max[i] = 0;
2097 dd->comm->flop_n = 0;
2100 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2102 gmx_domdec_comm_t *comm;
2103 domdec_load_t *load;
2104 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2109 fprintf(debug, "get_load_distribution start\n");
2112 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2116 bSepPME = (dd->pme_nodeid >= 0);
2118 if (dd->ndim == 0 && bSepPME)
2120 /* Without decomposition, but with PME nodes, we need the load */
2121 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2122 comm->load[0].pme = comm->cycl[ddCyclPME];
2125 for (int d = dd->ndim - 1; d >= 0; d--)
2127 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2128 const int dim = dd->dim[d];
2129 /* Check if we participate in the communication in this dimension */
2130 if (d == dd->ndim-1 ||
2131 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2133 load = &comm->load[d];
2134 if (isDlbOn(dd->comm))
2136 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2139 if (d == dd->ndim-1)
2141 sbuf[pos++] = dd_force_load(comm);
2142 sbuf[pos++] = sbuf[0];
2143 if (isDlbOn(dd->comm))
2145 sbuf[pos++] = sbuf[0];
2146 sbuf[pos++] = cell_frac;
2149 sbuf[pos++] = cellsizes.fracLowerMax;
2150 sbuf[pos++] = cellsizes.fracUpperMin;
2155 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2156 sbuf[pos++] = comm->cycl[ddCyclPME];
2161 sbuf[pos++] = comm->load[d+1].sum;
2162 sbuf[pos++] = comm->load[d+1].max;
2163 if (isDlbOn(dd->comm))
2165 sbuf[pos++] = comm->load[d+1].sum_m;
2166 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2167 sbuf[pos++] = comm->load[d+1].flags;
2170 sbuf[pos++] = cellsizes.fracLowerMax;
2171 sbuf[pos++] = cellsizes.fracUpperMin;
2176 sbuf[pos++] = comm->load[d+1].mdf;
2177 sbuf[pos++] = comm->load[d+1].pme;
2181 /* Communicate a row in DD direction d.
2182 * The communicators are setup such that the root always has rank 0.
2185 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2186 load->load, load->nload*sizeof(float), MPI_BYTE,
2187 0, comm->mpi_comm_load[d]);
2189 if (dd->ci[dim] == dd->master_ci[dim])
2191 /* We are the master along this row, process this row */
2192 RowMaster *rowMaster = nullptr;
2196 rowMaster = cellsizes.rowMaster.get();
2206 for (int i = 0; i < dd->nc[dim]; i++)
2208 load->sum += load->load[pos++];
2209 load->max = std::max(load->max, load->load[pos]);
2211 if (isDlbOn(dd->comm))
2213 if (rowMaster->dlbIsLimited)
2215 /* This direction could not be load balanced properly,
2216 * therefore we need to use the maximum iso the average load.
2218 load->sum_m = std::max(load->sum_m, load->load[pos]);
2222 load->sum_m += load->load[pos];
2225 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2229 load->flags = gmx::roundToInt(load->load[pos++]);
2233 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2234 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2239 load->mdf = std::max(load->mdf, load->load[pos]);
2241 load->pme = std::max(load->pme, load->load[pos]);
2245 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2247 load->sum_m *= dd->nc[dim];
2248 load->flags |= (1<<d);
2256 comm->nload += dd_load_count(comm);
2257 comm->load_step += comm->cycl[ddCyclStep];
2258 comm->load_sum += comm->load[0].sum;
2259 comm->load_max += comm->load[0].max;
2262 for (int d = 0; d < dd->ndim; d++)
2264 if (comm->load[0].flags & (1<<d))
2266 comm->load_lim[d]++;
2272 comm->load_mdf += comm->load[0].mdf;
2273 comm->load_pme += comm->load[0].pme;
2277 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2281 fprintf(debug, "get_load_distribution finished\n");
2285 static float dd_force_load_fraction(gmx_domdec_t *dd)
2287 /* Return the relative performance loss on the total run time
2288 * due to the force calculation load imbalance.
2290 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2292 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2300 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2302 /* Return the relative performance loss on the total run time
2303 * due to the force calculation load imbalance.
2305 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2308 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2309 (dd->comm->load_step*dd->nnodes);
2317 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2319 gmx_domdec_comm_t *comm = dd->comm;
2321 /* Only the master rank prints loads and only if we measured loads */
2322 if (!DDMASTER(dd) || comm->nload == 0)
2328 int numPpRanks = dd->nnodes;
2329 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2330 int numRanks = numPpRanks + numPmeRanks;
2331 float lossFraction = 0;
2333 /* Print the average load imbalance and performance loss */
2334 if (dd->nnodes > 1 && comm->load_sum > 0)
2336 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2337 lossFraction = dd_force_imb_perf_loss(dd);
2339 std::string msg = "\n Dynamic load balancing report:\n";
2340 std::string dlbStateStr;
2342 switch (dd->comm->dlbState)
2344 case DlbState::offUser:
2345 dlbStateStr = "DLB was off during the run per user request.";
2347 case DlbState::offForever:
2348 /* Currectly this can happen due to performance loss observed, cell size
2349 * limitations or incompatibility with other settings observed during
2350 * determineInitialDlbState(). */
2351 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2353 case DlbState::offCanTurnOn:
2354 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2356 case DlbState::offTemporarilyLocked:
2357 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2359 case DlbState::onCanTurnOff:
2360 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2362 case DlbState::onUser:
2363 dlbStateStr = "DLB was permanently on during the run per user request.";
2366 GMX_ASSERT(false, "Undocumented DLB state");
2369 msg += " " + dlbStateStr + "\n";
2370 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2371 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2372 gmx::roundToInt(dd_force_load_fraction(dd)*100));
2373 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2375 fprintf(fplog, "%s", msg.c_str());
2376 fprintf(stderr, "%s", msg.c_str());
2379 /* Print during what percentage of steps the load balancing was limited */
2380 bool dlbWasLimited = false;
2383 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2384 for (int d = 0; d < dd->ndim; d++)
2386 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2387 sprintf(buf+strlen(buf), " %c %d %%",
2388 dim2char(dd->dim[d]), limitPercentage);
2389 if (limitPercentage >= 50)
2391 dlbWasLimited = true;
2394 sprintf(buf + strlen(buf), "\n");
2395 fprintf(fplog, "%s", buf);
2396 fprintf(stderr, "%s", buf);
2399 /* Print the performance loss due to separate PME - PP rank imbalance */
2400 float lossFractionPme = 0;
2401 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2403 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2404 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2405 if (lossFractionPme <= 0)
2407 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2411 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2413 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2414 fprintf(fplog, "%s", buf);
2415 fprintf(stderr, "%s", buf);
2416 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2417 fprintf(fplog, "%s", buf);
2418 fprintf(stderr, "%s", buf);
2420 fprintf(fplog, "\n");
2421 fprintf(stderr, "\n");
2423 if (lossFraction >= DD_PERF_LOSS_WARN)
2426 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2427 " in the domain decomposition.\n", lossFraction*100);
2430 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2432 else if (dlbWasLimited)
2434 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2436 fprintf(fplog, "%s\n", buf);
2437 fprintf(stderr, "%s\n", buf);
2439 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2442 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2443 " had %s work to do than the PP ranks.\n"
2444 " You might want to %s the number of PME ranks\n"
2445 " or %s the cut-off and the grid spacing.\n",
2446 std::fabs(lossFractionPme*100),
2447 (lossFractionPme < 0) ? "less" : "more",
2448 (lossFractionPme < 0) ? "decrease" : "increase",
2449 (lossFractionPme < 0) ? "decrease" : "increase");
2450 fprintf(fplog, "%s\n", buf);
2451 fprintf(stderr, "%s\n", buf);
2455 static float dd_vol_min(gmx_domdec_t *dd)
2457 return dd->comm->load[0].cvol_min*dd->nnodes;
2460 static int dd_load_flags(gmx_domdec_t *dd)
2462 return dd->comm->load[0].flags;
2465 static float dd_f_imbal(gmx_domdec_t *dd)
2467 if (dd->comm->load[0].sum > 0)
2469 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2473 /* Something is wrong in the cycle counting, report no load imbalance */
2478 float dd_pme_f_ratio(gmx_domdec_t *dd)
2480 /* Should only be called on the DD master rank */
2481 assert(DDMASTER(dd));
2483 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2485 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2494 dd_print_load(gmx_domdec_t *dd,
2497 gmx::StringOutputStream stream;
2498 gmx::TextWriter log(&stream);
2500 int flags = dd_load_flags(dd);
2503 log.writeString("DD load balancing is limited by minimum cell size in dimension");
2504 for (int d = 0; d < dd->ndim; d++)
2508 log.writeStringFormatted(" %c", dim2char(dd->dim[d]));
2511 log.ensureLineBreak();
2513 log.writeString("DD step %s" + gmx::toString(step));
2514 if (isDlbOn(dd->comm))
2516 log.writeStringFormatted(" vol min/aver %5.3f%c",
2517 dd_vol_min(dd), flags ? '!' : ' ');
2521 log.writeStringFormatted(" load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2523 if (dd->comm->cycl_n[ddCyclPME])
2525 log.writeStringFormatted(" pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2527 log.ensureLineBreak();
2528 return stream.toString();
2531 static void dd_print_load_verbose(gmx_domdec_t *dd)
2533 if (isDlbOn(dd->comm))
2535 fprintf(stderr, "vol %4.2f%c ",
2536 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2540 fprintf(stderr, "imb F %2d%% ", gmx::roundToInt(dd_f_imbal(dd)*100));
2542 if (dd->comm->cycl_n[ddCyclPME])
2544 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2549 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2554 gmx_bool bPartOfGroup = FALSE;
2556 dim = dd->dim[dim_ind];
2557 copy_ivec(loc, loc_c);
2558 for (i = 0; i < dd->nc[dim]; i++)
2561 rank = dd_index(dd->nc, loc_c);
2562 if (rank == dd->rank)
2564 /* This process is part of the group */
2565 bPartOfGroup = TRUE;
2568 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2572 dd->comm->mpi_comm_load[dim_ind] = c_row;
2573 if (!isDlbDisabled(dd->comm))
2575 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2577 if (dd->ci[dim] == dd->master_ci[dim])
2579 /* This is the root process of this row */
2580 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2582 RowMaster &rowMaster = *cellsizes.rowMaster;
2583 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2584 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2585 rowMaster.isCellMin.resize(dd->nc[dim]);
2588 rowMaster.bounds.resize(dd->nc[dim]);
2590 rowMaster.buf_ncd.resize(dd->nc[dim]);
2594 /* This is not a root process, we only need to receive cell_f */
2595 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2598 if (dd->ci[dim] == dd->master_ci[dim])
2600 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2606 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2610 int physicalnode_id_hash;
2612 MPI_Comm mpi_comm_pp_physicalnode;
2614 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2616 /* Only ranks with short-ranged tasks (currently) use GPUs.
2617 * If we don't have GPUs assigned, there are no resources to share.
2622 physicalnode_id_hash = gmx_physicalnode_id_hash();
2628 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2629 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2630 dd->rank, physicalnode_id_hash, gpu_id);
2632 /* Split the PP communicator over the physical nodes */
2633 /* TODO: See if we should store this (before), as it's also used for
2634 * for the nodecomm summation.
2636 // TODO PhysicalNodeCommunicator could be extended/used to handle
2637 // the need for per-node per-group communicators.
2638 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2639 &mpi_comm_pp_physicalnode);
2640 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2641 &dd->comm->mpi_comm_gpu_shared);
2642 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2643 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2647 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2650 /* Note that some ranks could share a GPU, while others don't */
2652 if (dd->comm->nrank_gpu_shared == 1)
2654 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2657 GMX_UNUSED_VALUE(cr);
2658 GMX_UNUSED_VALUE(gpu_id);
2662 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2665 int dim0, dim1, i, j;
2670 fprintf(debug, "Making load communicators\n");
2673 snew(dd->comm->load, std::max(dd->ndim, 1));
2674 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2682 make_load_communicator(dd, 0, loc);
2686 for (i = 0; i < dd->nc[dim0]; i++)
2689 make_load_communicator(dd, 1, loc);
2695 for (i = 0; i < dd->nc[dim0]; i++)
2699 for (j = 0; j < dd->nc[dim1]; j++)
2702 make_load_communicator(dd, 2, loc);
2709 fprintf(debug, "Finished making load communicators\n");
2714 /*! \brief Sets up the relation between neighboring domains and zones */
2715 static void setup_neighbor_relations(gmx_domdec_t *dd)
2717 int d, dim, i, j, m;
2719 gmx_domdec_zones_t *zones;
2720 gmx_domdec_ns_ranges_t *izone;
2722 for (d = 0; d < dd->ndim; d++)
2725 copy_ivec(dd->ci, tmp);
2726 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2727 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2728 copy_ivec(dd->ci, tmp);
2729 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2730 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2733 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2736 dd->neighbor[d][1]);
2740 int nzone = (1 << dd->ndim);
2741 int nizone = (1 << std::max(dd->ndim - 1, 0));
2742 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2744 zones = &dd->comm->zones;
2746 for (i = 0; i < nzone; i++)
2749 clear_ivec(zones->shift[i]);
2750 for (d = 0; d < dd->ndim; d++)
2752 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2757 for (i = 0; i < nzone; i++)
2759 for (d = 0; d < DIM; d++)
2761 s[d] = dd->ci[d] - zones->shift[i][d];
2766 else if (s[d] >= dd->nc[d])
2772 zones->nizone = nizone;
2773 for (i = 0; i < zones->nizone; i++)
2775 assert(ddNonbondedZonePairRanges[i][0] == i);
2777 izone = &zones->izone[i];
2778 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2779 * j-zones up to nzone.
2781 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2782 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2783 for (dim = 0; dim < DIM; dim++)
2785 if (dd->nc[dim] == 1)
2787 /* All shifts should be allowed */
2788 izone->shift0[dim] = -1;
2789 izone->shift1[dim] = 1;
2793 /* Determine the min/max j-zone shift wrt the i-zone */
2794 izone->shift0[dim] = 1;
2795 izone->shift1[dim] = -1;
2796 for (j = izone->j0; j < izone->j1; j++)
2798 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2799 if (shift_diff < izone->shift0[dim])
2801 izone->shift0[dim] = shift_diff;
2803 if (shift_diff > izone->shift1[dim])
2805 izone->shift1[dim] = shift_diff;
2812 if (!isDlbDisabled(dd->comm))
2814 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2817 if (dd->comm->bRecordLoad)
2819 make_load_communicators(dd);
2823 static void make_pp_communicator(const gmx::MDLogger &mdlog,
2825 t_commrec gmx_unused *cr,
2826 bool gmx_unused reorder)
2829 gmx_domdec_comm_t *comm;
2836 if (comm->bCartesianPP)
2838 /* Set up cartesian communication for the particle-particle part */
2839 GMX_LOG(mdlog.info).appendTextFormatted(
2840 "Will use a Cartesian communicator: %d x %d x %d",
2841 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2843 for (int i = 0; i < DIM; i++)
2847 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
2849 /* We overwrite the old communicator with the new cartesian one */
2850 cr->mpi_comm_mygroup = comm_cart;
2853 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2854 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2856 if (comm->bCartesianPP_PME)
2858 /* Since we want to use the original cartesian setup for sim,
2859 * and not the one after split, we need to make an index.
2861 snew(comm->ddindex2ddnodeid, dd->nnodes);
2862 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2863 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2864 /* Get the rank of the DD master,
2865 * above we made sure that the master node is a PP node.
2875 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2877 else if (comm->bCartesianPP)
2879 if (cr->npmenodes == 0)
2881 /* The PP communicator is also
2882 * the communicator for this simulation
2884 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2886 cr->nodeid = dd->rank;
2888 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2890 /* We need to make an index to go from the coordinates
2891 * to the nodeid of this simulation.
2893 snew(comm->ddindex2simnodeid, dd->nnodes);
2894 snew(buf, dd->nnodes);
2895 if (thisRankHasDuty(cr, DUTY_PP))
2897 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2899 /* Communicate the ddindex to simulation nodeid index */
2900 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2901 cr->mpi_comm_mysim);
2904 /* Determine the master coordinates and rank.
2905 * The DD master should be the same node as the master of this sim.
2907 for (int i = 0; i < dd->nnodes; i++)
2909 if (comm->ddindex2simnodeid[i] == 0)
2911 ddindex2xyz(dd->nc, i, dd->master_ci);
2912 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2917 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2922 /* No Cartesian communicators */
2923 /* We use the rank in dd->comm->all as DD index */
2924 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2925 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2927 clear_ivec(dd->master_ci);
2931 GMX_LOG(mdlog.info).appendTextFormatted(
2932 "Domain decomposition rank %d, coordinates %d %d %d\n",
2933 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2937 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2938 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2942 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2946 gmx_domdec_comm_t *comm = dd->comm;
2948 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2951 snew(comm->ddindex2simnodeid, dd->nnodes);
2952 snew(buf, dd->nnodes);
2953 if (thisRankHasDuty(cr, DUTY_PP))
2955 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2957 /* Communicate the ddindex to simulation nodeid index */
2958 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2959 cr->mpi_comm_mysim);
2963 GMX_UNUSED_VALUE(dd);
2964 GMX_UNUSED_VALUE(cr);
2968 static void split_communicator(const gmx::MDLogger &mdlog,
2969 t_commrec *cr, gmx_domdec_t *dd,
2970 DdRankOrder gmx_unused rankOrder,
2971 bool gmx_unused reorder)
2973 gmx_domdec_comm_t *comm;
2982 if (comm->bCartesianPP)
2984 for (i = 1; i < DIM; i++)
2986 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2988 if (bDiv[YY] || bDiv[ZZ])
2990 comm->bCartesianPP_PME = TRUE;
2991 /* If we have 2D PME decomposition, which is always in x+y,
2992 * we stack the PME only nodes in z.
2993 * Otherwise we choose the direction that provides the thinnest slab
2994 * of PME only nodes as this will have the least effect
2995 * on the PP communication.
2996 * But for the PME communication the opposite might be better.
2998 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3000 dd->nc[YY] > dd->nc[ZZ]))
3002 comm->cartpmedim = ZZ;
3006 comm->cartpmedim = YY;
3008 comm->ntot[comm->cartpmedim]
3009 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3013 GMX_LOG(mdlog.info).appendTextFormatted(
3014 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
3015 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3016 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
3020 if (comm->bCartesianPP_PME)
3026 GMX_LOG(mdlog.info).appendTextFormatted(
3027 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
3028 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3030 for (i = 0; i < DIM; i++)
3034 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
3036 MPI_Comm_rank(comm_cart, &rank);
3037 if (MASTER(cr) && rank != 0)
3039 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3042 /* With this assigment we loose the link to the original communicator
3043 * which will usually be MPI_COMM_WORLD, unless have multisim.
3045 cr->mpi_comm_mysim = comm_cart;
3046 cr->sim_nodeid = rank;
3048 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3050 GMX_LOG(mdlog.info).appendTextFormatted(
3051 "Cartesian rank %d, coordinates %d %d %d\n",
3052 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3054 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3058 if (cr->npmenodes == 0 ||
3059 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3061 cr->duty = DUTY_PME;
3064 /* Split the sim communicator into PP and PME only nodes */
3065 MPI_Comm_split(cr->mpi_comm_mysim,
3066 getThisRankDuties(cr),
3067 dd_index(comm->ntot, dd->ci),
3068 &cr->mpi_comm_mygroup);
3075 case DdRankOrder::pp_pme:
3076 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
3078 case DdRankOrder::interleave:
3079 /* Interleave the PP-only and PME-only ranks */
3080 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
3081 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3083 case DdRankOrder::cartesian:
3086 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3089 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3091 cr->duty = DUTY_PME;
3098 /* Split the sim communicator into PP and PME only nodes */
3099 MPI_Comm_split(cr->mpi_comm_mysim,
3100 getThisRankDuties(cr),
3102 &cr->mpi_comm_mygroup);
3103 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3107 GMX_LOG(mdlog.info).appendTextFormatted(
3108 "This rank does only %s work.\n",
3109 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3112 /*! \brief Generates the MPI communicators for domain decomposition */
3113 static void make_dd_communicators(const gmx::MDLogger &mdlog,
3115 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3117 gmx_domdec_comm_t *comm;
3122 copy_ivec(dd->nc, comm->ntot);
3124 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3125 comm->bCartesianPP_PME = FALSE;
3127 /* Reorder the nodes by default. This might change the MPI ranks.
3128 * Real reordering is only supported on very few architectures,
3129 * Blue Gene is one of them.
3131 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
3133 if (cr->npmenodes > 0)
3135 /* Split the communicator into a PP and PME part */
3136 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
3137 if (comm->bCartesianPP_PME)
3139 /* We (possibly) reordered the nodes in split_communicator,
3140 * so it is no longer required in make_pp_communicator.
3142 CartReorder = FALSE;
3147 /* All nodes do PP and PME */
3149 /* We do not require separate communicators */
3150 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3154 if (thisRankHasDuty(cr, DUTY_PP))
3156 /* Copy or make a new PP communicator */
3157 make_pp_communicator(mdlog, dd, cr, CartReorder);
3161 receive_ddindex2simnodeid(dd, cr);
3164 if (!thisRankHasDuty(cr, DUTY_PME))
3166 /* Set up the commnuication to our PME node */
3167 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3168 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3171 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3172 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3177 dd->pme_nodeid = -1;
3182 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3184 comm->cgs_gl.index[comm->cgs_gl.nr]);
3188 static real *get_slb_frac(const gmx::MDLogger &mdlog,
3189 const char *dir, int nc, const char *size_string)
3191 real *slb_frac, tot;
3196 if (nc > 1 && size_string != nullptr)
3198 GMX_LOG(mdlog.info).appendTextFormatted(
3199 "Using static load balancing for the %s direction", dir);
3202 for (i = 0; i < nc; i++)
3205 sscanf(size_string, "%20lf%n", &dbl, &n);
3208 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3215 std::string relativeCellSizes = "Relative cell sizes:";
3216 for (i = 0; i < nc; i++)
3219 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
3221 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
3227 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3230 gmx_mtop_ilistloop_t iloop;
3231 const InteractionList *il;
3234 iloop = gmx_mtop_ilistloop_init(mtop);
3235 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3237 for (ftype = 0; ftype < F_NRE; ftype++)
3239 if ((interaction_function[ftype].flags & IF_BOND) &&
3242 n += nmol*il[ftype].size()/(1 + NRAL(ftype));
3250 static int dd_getenv(const gmx::MDLogger &mdlog,
3251 const char *env_var, int def)
3257 val = getenv(env_var);
3260 if (sscanf(val, "%20d", &nst) <= 0)
3264 GMX_LOG(mdlog.info).appendTextFormatted(
3265 "Found env.var. %s = %s, using value %d",
3272 static void check_dd_restrictions(const gmx_domdec_t *dd,
3273 const t_inputrec *ir,
3274 const gmx::MDLogger &mdlog)
3276 if (ir->ePBC == epbcSCREW &&
3277 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3279 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3282 if (ir->ns_type == ensSIMPLE)
3284 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3287 if (ir->nstlist == 0)
3289 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3292 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3294 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3298 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3303 r = ddbox->box_size[XX];
3304 for (di = 0; di < dd->ndim; di++)
3307 /* Check using the initial average cell size */
3308 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3314 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3316 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
3317 const std::string &reasonStr,
3318 const gmx::MDLogger &mdlog)
3320 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3321 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3323 if (cmdlineDlbState == DlbState::onUser)
3325 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3327 else if (cmdlineDlbState == DlbState::offCanTurnOn)
3329 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
3331 return DlbState::offForever;
3334 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3336 * This function parses the parameters of "-dlb" command line option setting
3337 * corresponding state values. Then it checks the consistency of the determined
3338 * state with other run parameters and settings. As a result, the initial state
3339 * may be altered or an error may be thrown if incompatibility of options is detected.
3341 * \param [in] mdlog Logger.
3342 * \param [in] dlbOption Enum value for the DLB option.
3343 * \param [in] bRecordLoad True if the load balancer is recording load information.
3344 * \param [in] mdrunOptions Options for mdrun.
3345 * \param [in] ir Pointer mdrun to input parameters.
3346 * \returns DLB initial/startup state.
3348 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
3349 DlbOption dlbOption, gmx_bool bRecordLoad,
3350 const MdrunOptions &mdrunOptions,
3351 const t_inputrec *ir)
3353 DlbState dlbState = DlbState::offCanTurnOn;
3357 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
3358 case DlbOption::no: dlbState = DlbState::offUser; break;
3359 case DlbOption::yes: dlbState = DlbState::onUser; break;
3360 default: gmx_incons("Invalid dlbOption enum value");
3363 /* Reruns don't support DLB: bail or override auto mode */
3364 if (mdrunOptions.rerun)
3366 std::string reasonStr = "it is not supported in reruns.";
3367 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3370 /* Unsupported integrators */
3371 if (!EI_DYNAMICS(ir->eI))
3373 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3374 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3377 /* Without cycle counters we can't time work to balance on */
3380 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3381 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3384 if (mdrunOptions.reproducible)
3386 std::string reasonStr = "you started a reproducible run.";
3389 case DlbState::offUser:
3391 case DlbState::offForever:
3392 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
3394 case DlbState::offCanTurnOn:
3395 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3396 case DlbState::onCanTurnOff:
3397 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
3399 case DlbState::onUser:
3400 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
3402 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3409 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
3412 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3414 /* Decomposition order z,y,x */
3415 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
3416 for (int dim = DIM-1; dim >= 0; dim--)
3418 if (dd->nc[dim] > 1)
3420 dd->dim[dd->ndim++] = dim;
3426 /* Decomposition order x,y,z */
3427 for (int dim = 0; dim < DIM; dim++)
3429 if (dd->nc[dim] > 1)
3431 dd->dim[dd->ndim++] = dim;
3438 /* Set dim[0] to avoid extra checks on ndim in several places */
3443 static gmx_domdec_comm_t *init_dd_comm()
3445 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3447 comm->n_load_have = 0;
3448 comm->n_load_collect = 0;
3450 comm->haveTurnedOffDlb = false;
3452 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3454 comm->sum_nat[i] = 0;
3458 comm->load_step = 0;
3461 clear_ivec(comm->load_lim);
3465 /* This should be replaced by a unique pointer */
3466 comm->balanceRegion = ddBalanceRegionAllocate();
3471 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3472 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
3473 t_commrec *cr, gmx_domdec_t *dd,
3474 const DomdecOptions &options,
3475 const MdrunOptions &mdrunOptions,
3476 const gmx_mtop_t *mtop,
3477 const t_inputrec *ir,
3479 gmx::ArrayRef<const gmx::RVec> xGlobal,
3483 real r_bonded_limit = -1;
3484 const real tenPercentMargin = 1.1;
3485 gmx_domdec_comm_t *comm = dd->comm;
3487 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3488 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3490 dd->pme_recv_f_alloc = 0;
3491 dd->pme_recv_f_buf = nullptr;
3493 /* Initialize to GPU share count to 0, might change later */
3494 comm->nrank_gpu_shared = 0;
3496 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3497 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3498 /* To consider turning DLB on after 2*nstlist steps we need to check
3499 * at partitioning count 3. Thus we need to increase the first count by 2.
3501 comm->ddPartioningCountFirstDlbOff += 2;
3503 GMX_LOG(mdlog.info).appendTextFormatted(
3504 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
3506 comm->bPMELoadBalDLBLimits = FALSE;
3508 /* Allocate the charge group/atom sorting struct */
3509 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3511 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3513 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3514 mtop->bIntermolecularInteractions);
3515 if (comm->bInterCGBondeds)
3517 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3521 comm->bInterCGMultiBody = FALSE;
3524 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3525 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3529 /* Set the cut-off to some very large value,
3530 * so we don't need if statements everywhere in the code.
3531 * We use sqrt, since the cut-off is squared in some places.
3533 comm->cutoff = GMX_CUTOFF_INF;
3537 comm->cutoff = ir->rlist;
3539 comm->cutoff_mbody = 0;
3541 /* Determine the minimum cell size limit, affected by many factors */
3542 comm->cellsize_limit = 0;
3543 comm->bBondComm = FALSE;
3545 /* We do not allow home atoms to move beyond the neighboring domain
3546 * between domain decomposition steps, which limits the cell size.
3547 * Get an estimate of cell size limit due to atom displacement.
3548 * In most cases this is a large overestimate, because it assumes
3549 * non-interaction atoms.
3550 * We set the chance to 1 in a trillion steps.
3552 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
3553 const real limitForAtomDisplacement =
3554 minCellSizeForAtomDisplacement(*mtop, *ir, c_chanceThatAtomMovesBeyondDomain);
3555 GMX_LOG(mdlog.info).appendTextFormatted(
3556 "Minimum cell size due to atom displacement: %.3f nm",
3557 limitForAtomDisplacement);
3559 comm->cellsize_limit = std::max(comm->cellsize_limit,
3560 limitForAtomDisplacement);
3562 /* TODO: PME decomposition currently requires atoms not to be more than
3563 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
3564 * In nearly all cases, limitForAtomDisplacement will be smaller
3565 * than 2/3*rlist, so the PME requirement is satisfied.
3566 * But it would be better for both correctness and performance
3567 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
3568 * Note that we would need to improve the pairlist buffer case.
3571 if (comm->bInterCGBondeds)
3573 if (options.minimumCommunicationRange > 0)
3575 comm->cutoff_mbody = options.minimumCommunicationRange;
3576 if (options.useBondedCommunication)
3578 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3582 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3584 r_bonded_limit = comm->cutoff_mbody;
3586 else if (ir->bPeriodicMols)
3588 /* Can not easily determine the required cut-off */
3589 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
3590 comm->cutoff_mbody = comm->cutoff/2;
3591 r_bonded_limit = comm->cutoff_mbody;
3599 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3600 options.checkBondedInteractions,
3603 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3604 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3606 /* We use an initial margin of 10% for the minimum cell size,
3607 * except when we are just below the non-bonded cut-off.
3609 if (options.useBondedCommunication)
3611 if (std::max(r_2b, r_mb) > comm->cutoff)
3613 r_bonded = std::max(r_2b, r_mb);
3614 r_bonded_limit = tenPercentMargin*r_bonded;
3615 comm->bBondComm = TRUE;
3620 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3622 /* We determine cutoff_mbody later */
3626 /* No special bonded communication,
3627 * simply increase the DD cut-off.
3629 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3630 comm->cutoff_mbody = r_bonded_limit;
3631 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3634 GMX_LOG(mdlog.info).appendTextFormatted(
3635 "Minimum cell size due to bonded interactions: %.3f nm",
3638 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3642 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3644 /* There is a cell size limit due to the constraints (P-LINCS) */
3645 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
3646 GMX_LOG(mdlog.info).appendTextFormatted(
3647 "Estimated maximum distance required for P-LINCS: %.3f nm",
3649 if (rconstr > comm->cellsize_limit)
3651 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
3654 else if (options.constraintCommunicationRange > 0)
3656 /* Here we do not check for dd->bInterCGcons,
3657 * because one can also set a cell size limit for virtual sites only
3658 * and at this point we don't know yet if there are intercg v-sites.
3660 GMX_LOG(mdlog.info).appendTextFormatted(
3661 "User supplied maximum distance required for P-LINCS: %.3f nm",
3662 options.constraintCommunicationRange);
3663 rconstr = options.constraintCommunicationRange;
3665 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3667 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3669 if (options.numCells[XX] > 0)
3671 copy_ivec(options.numCells, dd->nc);
3672 set_dd_dim(mdlog, dd);
3673 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3675 if (options.numPmeRanks >= 0)
3677 cr->npmenodes = options.numPmeRanks;
3681 /* When the DD grid is set explicitly and -npme is set to auto,
3682 * don't use PME ranks. We check later if the DD grid is
3683 * compatible with the total number of ranks.
3688 real acs = average_cellsize_min(dd, ddbox);
3689 if (acs < comm->cellsize_limit)
3691 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3692 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3693 acs, comm->cellsize_limit);
3698 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3700 /* We need to choose the optimal DD grid and possibly PME nodes */
3702 dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
3703 options.numPmeRanks,
3704 !isDlbDisabled(comm),
3706 comm->cellsize_limit, comm->cutoff,
3707 comm->bInterCGBondeds);
3709 if (dd->nc[XX] == 0)
3712 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3713 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3714 !bC ? "-rdd" : "-rcon",
3715 comm->dlbState != DlbState::offUser ? " or -dds" : "",
3716 bC ? " or your LINCS settings" : "");
3718 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3719 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3721 "Look in the log file for details on the domain decomposition",
3722 cr->nnodes-cr->npmenodes, limit, buf);
3724 set_dd_dim(mdlog, dd);
3727 GMX_LOG(mdlog.info).appendTextFormatted(
3728 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
3729 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3731 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3732 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3734 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3735 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3736 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3738 if (cr->npmenodes > dd->nnodes)
3740 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3741 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3743 if (cr->npmenodes > 0)
3745 comm->npmenodes = cr->npmenodes;
3749 comm->npmenodes = dd->nnodes;
3752 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3754 /* The following choices should match those
3755 * in comm_cost_est in domdec_setup.c.
3756 * Note that here the checks have to take into account
3757 * that the decomposition might occur in a different order than xyz
3758 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3759 * in which case they will not match those in comm_cost_est,
3760 * but since that is mainly for testing purposes that's fine.
3762 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3763 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3764 getenv("GMX_PMEONEDD") == nullptr)
3766 comm->npmedecompdim = 2;
3767 comm->npmenodes_x = dd->nc[XX];
3768 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3772 /* In case nc is 1 in both x and y we could still choose to
3773 * decompose pme in y instead of x, but we use x for simplicity.
3775 comm->npmedecompdim = 1;
3776 if (dd->dim[0] == YY)
3778 comm->npmenodes_x = 1;
3779 comm->npmenodes_y = comm->npmenodes;
3783 comm->npmenodes_x = comm->npmenodes;
3784 comm->npmenodes_y = 1;
3787 GMX_LOG(mdlog.info).appendTextFormatted(
3788 "PME domain decomposition: %d x %d x %d",
3789 comm->npmenodes_x, comm->npmenodes_y, 1);
3793 comm->npmedecompdim = 0;
3794 comm->npmenodes_x = 0;
3795 comm->npmenodes_y = 0;
3798 snew(comm->slb_frac, DIM);
3799 if (isDlbDisabled(comm))
3801 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
3802 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
3803 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
3806 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3808 if (comm->bBondComm || !isDlbDisabled(comm))
3810 /* Set the bonded communication distance to halfway
3811 * the minimum and the maximum,
3812 * since the extra communication cost is nearly zero.
3814 real acs = average_cellsize_min(dd, ddbox);
3815 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3816 if (!isDlbDisabled(comm))
3818 /* Check if this does not limit the scaling */
3819 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3820 options.dlbScaling*acs);
3822 if (!comm->bBondComm)
3824 /* Without bBondComm do not go beyond the n.b. cut-off */
3825 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3826 if (comm->cellsize_limit >= comm->cutoff)
3828 /* We don't loose a lot of efficieny
3829 * when increasing it to the n.b. cut-off.
3830 * It can even be slightly faster, because we need
3831 * less checks for the communication setup.
3833 comm->cutoff_mbody = comm->cutoff;
3836 /* Check if we did not end up below our original limit */
3837 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3839 if (comm->cutoff_mbody > comm->cellsize_limit)
3841 comm->cellsize_limit = comm->cutoff_mbody;
3844 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3849 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3850 "cellsize limit %f\n",
3851 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3854 check_dd_restrictions(dd, ir, mdlog);
3857 static void set_dlb_limits(gmx_domdec_t *dd)
3860 for (int d = 0; d < dd->ndim; d++)
3862 /* Set the number of pulses to the value for DLB */
3863 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3865 dd->comm->cellsize_min[dd->dim[d]] =
3866 dd->comm->cellsize_min_dlb[dd->dim[d]];
3871 static void turn_on_dlb(const gmx::MDLogger &mdlog,
3875 gmx_domdec_comm_t *comm = dd->comm;
3877 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3878 for (int d = 1; d < dd->ndim; d++)
3880 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3883 /* Turn off DLB if we're too close to the cell size limit. */
3884 if (cellsize_min < comm->cellsize_limit*1.05)
3886 GMX_LOG(mdlog.info).appendTextFormatted(
3887 "step %s Measured %.1f %% performance loss due to load imbalance, "
3888 "but the minimum cell size is smaller than 1.05 times the cell size limit. "
3889 "Will no longer try dynamic load balancing.",
3890 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3892 comm->dlbState = DlbState::offForever;
3896 GMX_LOG(mdlog.info).appendTextFormatted(
3897 "step %s Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.",
3898 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3899 comm->dlbState = DlbState::onCanTurnOff;
3901 /* Store the non-DLB performance, so we can check if DLB actually
3902 * improves performance.
3904 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3905 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3909 /* We can set the required cell size info here,
3910 * so we do not need to communicate this.
3911 * The grid is completely uniform.
3913 for (int d = 0; d < dd->ndim; d++)
3915 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3919 comm->load[d].sum_m = comm->load[d].sum;
3921 int nc = dd->nc[dd->dim[d]];
3922 for (int i = 0; i < nc; i++)
3924 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3927 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3928 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3931 rowMaster->cellFrac[nc] = 1.0;
3936 static void turn_off_dlb(const gmx::MDLogger &mdlog,
3940 GMX_LOG(mdlog.info).appendText(
3941 "step " + gmx::toString(step) + " Turning off dynamic load balancing, because it is degrading performance.");
3942 dd->comm->dlbState = DlbState::offCanTurnOn;
3943 dd->comm->haveTurnedOffDlb = true;
3944 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3947 static void turn_off_dlb_forever(const gmx::MDLogger &mdlog,
3951 GMX_RELEASE_ASSERT(dd->comm->dlbState == DlbState::offCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3952 GMX_LOG(mdlog.info).appendText(
3953 "step " + gmx::toString(step) + " Will no longer try dynamic load balancing, as it degraded performance.");
3954 dd->comm->dlbState = DlbState::offForever;
3957 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3962 ncg = ncg_mtop(mtop);
3963 snew(bLocalCG, ncg);
3964 for (cg = 0; cg < ncg; cg++)
3966 bLocalCG[cg] = FALSE;
3972 void dd_init_bondeds(FILE *fplog,
3974 const gmx_mtop_t *mtop,
3975 const gmx_vsite_t *vsite,
3976 const t_inputrec *ir,
3977 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
3979 gmx_domdec_comm_t *comm;
3981 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
3985 if (comm->bBondComm)
3987 /* Communicate atoms beyond the cut-off for bonded interactions */
3990 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
3992 comm->bLocalCG = init_bLocalCG(mtop);
3996 /* Only communicate atoms based on cut-off */
3997 comm->cglink = nullptr;
3998 comm->bLocalCG = nullptr;
4002 static void writeSettings(gmx::TextWriter *log,
4004 const gmx_mtop_t *mtop,
4005 const t_inputrec *ir,
4006 gmx_bool bDynLoadBal,
4008 const gmx_ddbox_t *ddbox)
4010 gmx_domdec_comm_t *comm;
4019 log->writeString("The maximum number of communication pulses is:");
4020 for (d = 0; d < dd->ndim; d++)
4022 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4024 log->ensureLineBreak();
4025 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
4026 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
4027 log->writeString("The allowed shrink of domain decomposition cells is:");
4028 for (d = 0; d < DIM; d++)
4032 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4039 comm->cellsize_min_dlb[d]/
4040 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4042 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
4045 log->ensureLineBreak();
4049 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4050 log->writeString("The initial number of communication pulses is:");
4051 for (d = 0; d < dd->ndim; d++)
4053 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4055 log->ensureLineBreak();
4056 log->writeString("The initial domain decomposition cell size is:");
4057 for (d = 0; d < DIM; d++)
4061 log->writeStringFormatted(" %c %.2f nm",
4062 dim2char(d), dd->comm->cellsize_min[d]);
4065 log->ensureLineBreak();
4069 gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
4071 if (comm->bInterCGBondeds ||
4073 dd->bInterCGcons || dd->bInterCGsettles)
4075 log->writeLine("The maximum allowed distance for charge groups involved in interactions is:");
4076 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
4080 limit = dd->comm->cellsize_limit;
4084 if (dynamic_dd_box(ddbox, ir))
4086 log->writeLine("(the following are initial values, they could change due to box deformation)");
4088 limit = dd->comm->cellsize_min[XX];
4089 for (d = 1; d < DIM; d++)
4091 limit = std::min(limit, dd->comm->cellsize_min[d]);
4095 if (comm->bInterCGBondeds)
4097 log->writeLineFormatted("%40s %-7s %6.3f nm",
4098 "two-body bonded interactions", "(-rdd)",
4099 std::max(comm->cutoff, comm->cutoff_mbody));
4100 log->writeLineFormatted("%40s %-7s %6.3f nm",
4101 "multi-body bonded interactions", "(-rdd)",
4102 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4106 log->writeLineFormatted("%40s %-7s %6.3f nm",
4107 "virtual site constructions", "(-rcon)", limit);
4109 if (dd->bInterCGcons || dd->bInterCGsettles)
4111 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
4113 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
4114 separation.c_str(), "(-rcon)", limit);
4116 log->ensureLineBreak();
4120 static void logSettings(const gmx::MDLogger &mdlog,
4122 const gmx_mtop_t *mtop,
4123 const t_inputrec *ir,
4125 const gmx_ddbox_t *ddbox)
4127 gmx::StringOutputStream stream;
4128 gmx::TextWriter log(&stream);
4129 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
4130 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4133 log.ensureEmptyLine();
4134 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
4136 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
4138 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
4141 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
4144 const t_inputrec *ir,
4145 const gmx_ddbox_t *ddbox)
4147 gmx_domdec_comm_t *comm;
4148 int d, dim, npulse, npulse_d_max, npulse_d;
4153 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4155 /* Determine the maximum number of comm. pulses in one dimension */
4157 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4159 /* Determine the maximum required number of grid pulses */
4160 if (comm->cellsize_limit >= comm->cutoff)
4162 /* Only a single pulse is required */
4165 else if (!bNoCutOff && comm->cellsize_limit > 0)
4167 /* We round down slightly here to avoid overhead due to the latency
4168 * of extra communication calls when the cut-off
4169 * would be only slightly longer than the cell size.
4170 * Later cellsize_limit is redetermined,
4171 * so we can not miss interactions due to this rounding.
4173 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
4177 /* There is no cell size limit */
4178 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4181 if (!bNoCutOff && npulse > 1)
4183 /* See if we can do with less pulses, based on dlb_scale */
4185 for (d = 0; d < dd->ndim; d++)
4188 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
4189 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4190 npulse_d_max = std::max(npulse_d_max, npulse_d);
4192 npulse = std::min(npulse, npulse_d_max);
4195 /* This env var can override npulse */
4196 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
4203 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4204 for (d = 0; d < dd->ndim; d++)
4206 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4207 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4208 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4210 comm->bVacDLBNoLimit = FALSE;
4214 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4215 if (!comm->bVacDLBNoLimit)
4217 comm->cellsize_limit = std::max(comm->cellsize_limit,
4218 comm->cutoff/comm->maxpulse);
4220 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4221 /* Set the minimum cell size for each DD dimension */
4222 for (d = 0; d < dd->ndim; d++)
4224 if (comm->bVacDLBNoLimit ||
4225 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4227 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4231 comm->cellsize_min_dlb[dd->dim[d]] =
4232 comm->cutoff/comm->cd[d].np_dlb;
4235 if (comm->cutoff_mbody <= 0)
4237 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4245 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4247 /* If each molecule is a single charge group
4248 * or we use domain decomposition for each periodic dimension,
4249 * we do not need to take pbc into account for the bonded interactions.
4251 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4254 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4257 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4258 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
4259 gmx_domdec_t *dd, real dlb_scale,
4260 const gmx_mtop_t *mtop, const t_inputrec *ir,
4261 const gmx_ddbox_t *ddbox)
4263 gmx_domdec_comm_t *comm;
4269 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4271 init_ddpme(dd, &comm->ddpme[0], 0);
4272 if (comm->npmedecompdim >= 2)
4274 init_ddpme(dd, &comm->ddpme[1], 1);
4279 comm->npmenodes = 0;
4280 if (dd->pme_nodeid >= 0)
4282 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4283 "Can not have separate PME ranks without PME electrostatics");
4289 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4291 if (!isDlbDisabled(comm))
4293 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
4296 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
4298 if (ir->ePBC == epbcNONE)
4300 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
4305 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
4309 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4311 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4313 dd->ga2la = new gmx_ga2la_t(natoms_tot,
4314 static_cast<int>(vol_frac*natoms_tot));
4317 /*! \brief Set some important DD parameters that can be modified by env.vars */
4318 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
4319 gmx_domdec_t *dd, int rank_mysim)
4321 gmx_domdec_comm_t *comm = dd->comm;
4323 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
4324 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
4325 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
4326 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
4327 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
4328 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
4329 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
4333 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
4338 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
4339 if (comm->eFlop > 1)
4341 srand(1 + rank_mysim);
4343 comm->bRecordLoad = TRUE;
4347 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4351 DomdecOptions::DomdecOptions() :
4352 checkBondedInteractions(TRUE),
4353 useBondedCommunication(TRUE),
4355 rankOrder(DdRankOrder::pp_pme),
4356 minimumCommunicationRange(0),
4357 constraintCommunicationRange(0),
4358 dlbOption(DlbOption::turnOnWhenUseful),
4364 clear_ivec(numCells);
4367 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
4369 const DomdecOptions &options,
4370 const MdrunOptions &mdrunOptions,
4371 const gmx_mtop_t *mtop,
4372 const t_inputrec *ir,
4374 gmx::ArrayRef<const gmx::RVec> xGlobal,
4375 gmx::LocalAtomSetManager *atomSets)
4379 GMX_LOG(mdlog.info).appendTextFormatted(
4380 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
4382 dd = new gmx_domdec_t;
4384 dd->comm = init_dd_comm();
4386 /* Initialize DD paritioning counters */
4387 dd->comm->partition_step = INT_MIN;
4390 set_dd_envvar_options(mdlog, dd, cr->nodeid);
4392 gmx_ddbox_t ddbox = {0};
4393 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
4398 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
4400 if (thisRankHasDuty(cr, DUTY_PP))
4402 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
4404 setup_neighbor_relations(dd);
4407 /* Set overallocation to avoid frequent reallocation of arrays */
4408 set_over_alloc_dd(TRUE);
4410 clear_dd_cycle_counts(dd);
4412 dd->atomSets = atomSets;
4417 static gmx_bool test_dd_cutoff(t_commrec *cr,
4418 t_state *state, const t_inputrec *ir,
4429 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4433 for (d = 0; d < dd->ndim; d++)
4437 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4438 if (dynamic_dd_box(&ddbox, ir))
4440 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4443 np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4445 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4447 if (np > dd->comm->cd[d].np_dlb)
4452 /* If a current local cell size is smaller than the requested
4453 * cut-off, we could still fix it, but this gets very complicated.
4454 * Without fixing here, we might actually need more checks.
4456 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4463 if (!isDlbDisabled(dd->comm))
4465 /* If DLB is not active yet, we don't need to check the grid jumps.
4466 * Actually we shouldn't, because then the grid jump data is not set.
4468 if (isDlbOn(dd->comm) &&
4469 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4474 gmx_sumi(1, &LocallyLimited, cr);
4476 if (LocallyLimited > 0)
4485 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4488 gmx_bool bCutoffAllowed;
4490 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4494 cr->dd->comm->cutoff = cutoff_req;
4497 return bCutoffAllowed;
4500 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4502 gmx_domdec_comm_t *comm;
4504 comm = cr->dd->comm;
4506 /* Turn on the DLB limiting (might have been on already) */
4507 comm->bPMELoadBalDLBLimits = TRUE;
4509 /* Change the cut-off limit */
4510 comm->PMELoadBal_max_cutoff = cutoff;
4514 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4515 comm->PMELoadBal_max_cutoff);
4519 /* Sets whether we should later check the load imbalance data, so that
4520 * we can trigger dynamic load balancing if enough imbalance has
4523 * Used after PME load balancing unlocks DLB, so that the check
4524 * whether DLB will be useful can happen immediately.
4526 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4528 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4530 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4534 /* Store the DD partitioning count, so we can ignore cycle counts
4535 * over the next nstlist steps, which are often slower.
4537 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4542 /* Returns if we should check whether there has been enough load
4543 * imbalance to trigger dynamic load balancing.
4545 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4547 if (dd->comm->dlbState != DlbState::offCanTurnOn)
4552 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4554 /* We ignore the first nstlist steps at the start of the run
4555 * or after PME load balancing or after turning DLB off, since
4556 * these often have extra allocation or cache miss overhead.
4561 if (dd->comm->cycl_n[ddCyclStep] == 0)
4563 /* We can have zero timed steps when dd_partition_system is called
4564 * more than once at the same step, e.g. with replica exchange.
4565 * Turning on DLB would trigger an assertion failure later, but is
4566 * also useless right after exchanging replicas.
4571 /* We should check whether we should use DLB directly after
4573 if (dd->comm->bCheckWhetherToTurnDlbOn)
4575 /* This flag was set when the PME load-balancing routines
4576 unlocked DLB, and should now be cleared. */
4577 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4580 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4581 * partitionings (we do not do this every partioning, so that we
4582 * avoid excessive communication). */
4583 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4586 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4588 return isDlbOn(dd->comm);
4591 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4593 return (dd->comm->dlbState == DlbState::offTemporarilyLocked);
4596 void dd_dlb_lock(gmx_domdec_t *dd)
4598 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4599 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4601 dd->comm->dlbState = DlbState::offTemporarilyLocked;
4605 void dd_dlb_unlock(gmx_domdec_t *dd)
4607 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4608 if (dd->comm->dlbState == DlbState::offTemporarilyLocked)
4610 dd->comm->dlbState = DlbState::offCanTurnOn;
4611 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4615 static void merge_cg_buffers(int ncell,
4616 gmx_domdec_comm_dim_t *cd, int pulse,
4618 gmx::ArrayRef<int> index_gl,
4620 rvec *cg_cm, rvec *recv_vr,
4621 gmx::ArrayRef<int> cgindex,
4622 cginfo_mb_t *cginfo_mb, int *cginfo)
4624 gmx_domdec_ind_t *ind, *ind_p;
4625 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4626 int shift, shift_at;
4628 ind = &cd->ind[pulse];
4630 /* First correct the already stored data */
4631 shift = ind->nrecv[ncell];
4632 for (cell = ncell-1; cell >= 0; cell--)
4634 shift -= ind->nrecv[cell];
4637 /* Move the cg's present from previous grid pulses */
4638 cg0 = ncg_cell[ncell+cell];
4639 cg1 = ncg_cell[ncell+cell+1];
4640 cgindex[cg1+shift] = cgindex[cg1];
4641 for (cg = cg1-1; cg >= cg0; cg--)
4643 index_gl[cg+shift] = index_gl[cg];
4644 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4645 cgindex[cg+shift] = cgindex[cg];
4646 cginfo[cg+shift] = cginfo[cg];
4648 /* Correct the already stored send indices for the shift */
4649 for (p = 1; p <= pulse; p++)
4651 ind_p = &cd->ind[p];
4653 for (c = 0; c < cell; c++)
4655 cg0 += ind_p->nsend[c];
4657 cg1 = cg0 + ind_p->nsend[cell];
4658 for (cg = cg0; cg < cg1; cg++)
4660 ind_p->index[cg] += shift;
4666 /* Merge in the communicated buffers */
4670 for (cell = 0; cell < ncell; cell++)
4672 cg1 = ncg_cell[ncell+cell+1] + shift;
4675 /* Correct the old cg indices */
4676 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4678 cgindex[cg+1] += shift_at;
4681 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4683 /* Copy this charge group from the buffer */
4684 index_gl[cg1] = recv_i[cg0];
4685 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4686 /* Add it to the cgindex */
4687 cg_gl = index_gl[cg1];
4688 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4689 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4690 cgindex[cg1+1] = cgindex[cg1] + nat;
4695 shift += ind->nrecv[cell];
4696 ncg_cell[ncell+cell+1] = cg1;
4700 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4703 const gmx::RangePartitioning &atomGroups)
4705 /* Store the atom block boundaries for easy copying of communication buffers
4707 int g = atomGroupStart;
4708 for (int zone = 0; zone < nzone; zone++)
4710 for (gmx_domdec_ind_t &ind : cd->ind)
4712 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4713 ind.cell2at0[zone] = range.begin();
4714 ind.cell2at1[zone] = range.end();
4715 g += ind.nrecv[zone];
4720 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4726 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4728 if (!bLocalCG[link->a[i]])
4737 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4739 real c[DIM][4]; /* the corners for the non-bonded communication */
4740 real cr0; /* corner for rounding */
4741 real cr1[4]; /* corners for rounding */
4742 real bc[DIM]; /* corners for bounded communication */
4743 real bcr1; /* corner for rounding for bonded communication */
4746 /* Determine the corners of the domain(s) we are communicating with */
4748 set_dd_corners(const gmx_domdec_t *dd,
4749 int dim0, int dim1, int dim2,
4753 const gmx_domdec_comm_t *comm;
4754 const gmx_domdec_zones_t *zones;
4759 zones = &comm->zones;
4761 /* Keep the compiler happy */
4765 /* The first dimension is equal for all cells */
4766 c->c[0][0] = comm->cell_x0[dim0];
4769 c->bc[0] = c->c[0][0];
4774 /* This cell row is only seen from the first row */
4775 c->c[1][0] = comm->cell_x0[dim1];
4776 /* All rows can see this row */
4777 c->c[1][1] = comm->cell_x0[dim1];
4778 if (isDlbOn(dd->comm))
4780 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4783 /* For the multi-body distance we need the maximum */
4784 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4787 /* Set the upper-right corner for rounding */
4788 c->cr0 = comm->cell_x1[dim0];
4793 for (j = 0; j < 4; j++)
4795 c->c[2][j] = comm->cell_x0[dim2];
4797 if (isDlbOn(dd->comm))
4799 /* Use the maximum of the i-cells that see a j-cell */
4800 for (i = 0; i < zones->nizone; i++)
4802 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4807 std::max(c->c[2][j-4],
4808 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4814 /* For the multi-body distance we need the maximum */
4815 c->bc[2] = comm->cell_x0[dim2];
4816 for (i = 0; i < 2; i++)
4818 for (j = 0; j < 2; j++)
4820 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4826 /* Set the upper-right corner for rounding */
4827 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4828 * Only cell (0,0,0) can see cell 7 (1,1,1)
4830 c->cr1[0] = comm->cell_x1[dim1];
4831 c->cr1[3] = comm->cell_x1[dim1];
4832 if (isDlbOn(dd->comm))
4834 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4837 /* For the multi-body distance we need the maximum */
4838 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4845 /* Add the atom groups we need to send in this pulse from this zone to
4846 * \p localAtomGroups and \p work
4849 get_zone_pulse_cgs(gmx_domdec_t *dd,
4850 int zonei, int zone,
4852 gmx::ArrayRef<const int> globalAtomGroupIndices,
4853 const gmx::RangePartitioning &atomGroups,
4854 int dim, int dim_ind,
4855 int dim0, int dim1, int dim2,
4856 real r_comm2, real r_bcomm2,
4858 bool distanceIsTriclinic,
4860 real skew_fac2_d, real skew_fac_01,
4861 rvec *v_d, rvec *v_0, rvec *v_1,
4862 const dd_corners_t *c,
4863 const rvec sf2_round,
4864 gmx_bool bDistBonded,
4870 std::vector<int> *localAtomGroups,
4871 dd_comm_setup_work_t *work)
4873 gmx_domdec_comm_t *comm;
4875 gmx_bool bDistMB_pulse;
4877 real r2, rb2, r, tric_sh;
4884 bScrew = (dd->bScrewPBC && dim == XX);
4886 bDistMB_pulse = (bDistMB && bDistBonded);
4888 /* Unpack the work data */
4889 std::vector<int> &ibuf = work->atomGroupBuffer;
4890 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4894 for (cg = cg0; cg < cg1; cg++)
4898 if (!distanceIsTriclinic)
4900 /* Rectangular direction, easy */
4901 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4908 r = cg_cm[cg][dim] - c->bc[dim_ind];
4914 /* Rounding gives at most a 16% reduction
4915 * in communicated atoms
4917 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4919 r = cg_cm[cg][dim0] - c->cr0;
4920 /* This is the first dimension, so always r >= 0 */
4927 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4929 r = cg_cm[cg][dim1] - c->cr1[zone];
4936 r = cg_cm[cg][dim1] - c->bcr1;
4946 /* Triclinic direction, more complicated */
4949 /* Rounding, conservative as the skew_fac multiplication
4950 * will slightly underestimate the distance.
4952 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4954 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4955 for (i = dim0+1; i < DIM; i++)
4957 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4959 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4962 rb[dim0] = rn[dim0];
4965 /* Take care that the cell planes along dim0 might not
4966 * be orthogonal to those along dim1 and dim2.
4968 for (i = 1; i <= dim_ind; i++)
4971 if (normal[dim0][dimd] > 0)
4973 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
4976 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
4981 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4983 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
4985 for (i = dim1+1; i < DIM; i++)
4987 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
4989 rn[dim1] += tric_sh;
4992 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
4993 /* Take care of coupling of the distances
4994 * to the planes along dim0 and dim1 through dim2.
4996 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
4997 /* Take care that the cell planes along dim1
4998 * might not be orthogonal to that along dim2.
5000 if (normal[dim1][dim2] > 0)
5002 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5008 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5011 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5012 /* Take care of coupling of the distances
5013 * to the planes along dim0 and dim1 through dim2.
5015 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5016 /* Take care that the cell planes along dim1
5017 * might not be orthogonal to that along dim2.
5019 if (normal[dim1][dim2] > 0)
5021 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5026 /* The distance along the communication direction */
5027 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5029 for (i = dim+1; i < DIM; i++)
5031 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5036 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5037 /* Take care of coupling of the distances
5038 * to the planes along dim0 and dim1 through dim2.
5040 if (dim_ind == 1 && zonei == 1)
5042 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5048 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5051 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5052 /* Take care of coupling of the distances
5053 * to the planes along dim0 and dim1 through dim2.
5055 if (dim_ind == 1 && zonei == 1)
5057 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5065 ((bDistMB && rb2 < r_bcomm2) ||
5066 (bDist2B && r2 < r_bcomm2)) &&
5068 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5069 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5072 /* Store the local and global atom group indices and position */
5073 localAtomGroups->push_back(cg);
5074 ibuf.push_back(globalAtomGroupIndices[cg]);
5078 if (dd->ci[dim] == 0)
5080 /* Correct cg_cm for pbc */
5081 rvec_add(cg_cm[cg], box[dim], posPbc);
5084 posPbc[YY] = box[YY][YY] - posPbc[YY];
5085 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5090 copy_rvec(cg_cm[cg], posPbc);
5092 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5094 nat += atomGroups.block(cg).size();
5099 work->nsend_zone = nsend_z;
5102 static void clearCommSetupData(dd_comm_setup_work_t *work)
5104 work->localAtomGroupBuffer.clear();
5105 work->atomGroupBuffer.clear();
5106 work->positionBuffer.clear();
5108 work->nsend_zone = 0;
5111 static void setup_dd_communication(gmx_domdec_t *dd,
5112 matrix box, gmx_ddbox_t *ddbox,
5114 t_state *state, PaddedRVecVector *f)
5116 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5117 int nzone, nzone_send, zone, zonei, cg0, cg1;
5118 int c, i, cg, cg_gl, nrcg;
5119 int *zone_cg_range, pos_cg;
5120 gmx_domdec_comm_t *comm;
5121 gmx_domdec_zones_t *zones;
5122 gmx_domdec_comm_dim_t *cd;
5123 cginfo_mb_t *cginfo_mb;
5124 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5125 real r_comm2, r_bcomm2;
5126 dd_corners_t corners;
5127 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5128 real skew_fac2_d, skew_fac_01;
5133 fprintf(debug, "Setting up DD communication\n");
5138 if (comm->dth.empty())
5140 /* Initialize the thread data.
5141 * This can not be done in init_domain_decomposition,
5142 * as the numbers of threads is determined later.
5144 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5145 comm->dth.resize(numThreads);
5148 switch (fr->cutoff_scheme)
5154 cg_cm = as_rvec_array(state->x.data());
5157 gmx_incons("unimplemented");
5160 bBondComm = comm->bBondComm;
5162 /* Do we need to determine extra distances for multi-body bondeds? */
5163 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5165 /* Do we need to determine extra distances for only two-body bondeds? */
5166 bDist2B = (bBondComm && !bDistMB);
5168 r_comm2 = gmx::square(comm->cutoff);
5169 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5173 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5176 zones = &comm->zones;
5179 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5180 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5182 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5184 /* Triclinic stuff */
5185 normal = ddbox->normal;
5189 v_0 = ddbox->v[dim0];
5190 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5192 /* Determine the coupling coefficient for the distances
5193 * to the cell planes along dim0 and dim1 through dim2.
5194 * This is required for correct rounding.
5197 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5200 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5206 v_1 = ddbox->v[dim1];
5209 zone_cg_range = zones->cg_range;
5210 cginfo_mb = fr->cginfo_mb;
5212 zone_cg_range[0] = 0;
5213 zone_cg_range[1] = dd->ncg_home;
5214 comm->zone_ncg1[0] = dd->ncg_home;
5215 pos_cg = dd->ncg_home;
5217 nat_tot = comm->atomRanges.numHomeAtoms();
5219 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5221 dim = dd->dim[dim_ind];
5222 cd = &comm->cd[dim_ind];
5224 /* Check if we need to compute triclinic distances along this dim */
5225 bool distanceIsTriclinic = false;
5226 for (i = 0; i <= dim_ind; i++)
5228 if (ddbox->tric_dir[dd->dim[i]])
5230 distanceIsTriclinic = true;
5234 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5236 /* No pbc in this dimension, the first node should not comm. */
5244 v_d = ddbox->v[dim];
5245 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5247 cd->receiveInPlace = true;
5248 for (int p = 0; p < cd->numPulses(); p++)
5250 /* Only atoms communicated in the first pulse are used
5251 * for multi-body bonded interactions or for bBondComm.
5253 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5255 gmx_domdec_ind_t *ind = &cd->ind[p];
5257 /* Thread 0 writes in the global index array */
5259 clearCommSetupData(&comm->dth[0]);
5261 for (zone = 0; zone < nzone_send; zone++)
5263 if (dim_ind > 0 && distanceIsTriclinic)
5265 /* Determine slightly more optimized skew_fac's
5267 * This reduces the number of communicated atoms
5268 * by about 10% for 3D DD of rhombic dodecahedra.
5270 for (dimd = 0; dimd < dim; dimd++)
5272 sf2_round[dimd] = 1;
5273 if (ddbox->tric_dir[dimd])
5275 for (i = dd->dim[dimd]+1; i < DIM; i++)
5277 /* If we are shifted in dimension i
5278 * and the cell plane is tilted forward
5279 * in dimension i, skip this coupling.
5281 if (!(zones->shift[nzone+zone][i] &&
5282 ddbox->v[dimd][i][dimd] >= 0))
5285 gmx::square(ddbox->v[dimd][i][dimd]);
5288 sf2_round[dimd] = 1/sf2_round[dimd];
5293 zonei = zone_perm[dim_ind][zone];
5296 /* Here we permutate the zones to obtain a convenient order
5297 * for neighbor searching
5299 cg0 = zone_cg_range[zonei];
5300 cg1 = zone_cg_range[zonei+1];
5304 /* Look only at the cg's received in the previous grid pulse
5306 cg1 = zone_cg_range[nzone+zone+1];
5307 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5310 const int numThreads = static_cast<int>(comm->dth.size());
5311 #pragma omp parallel for num_threads(numThreads) schedule(static)
5312 for (int th = 0; th < numThreads; th++)
5316 dd_comm_setup_work_t &work = comm->dth[th];
5318 /* Retain data accumulated into buffers of thread 0 */
5321 clearCommSetupData(&work);
5324 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5325 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5327 /* Get the cg's for this pulse in this zone */
5328 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5329 dd->globalAtomGroupIndices,
5331 dim, dim_ind, dim0, dim1, dim2,
5333 box, distanceIsTriclinic,
5334 normal, skew_fac2_d, skew_fac_01,
5335 v_d, v_0, v_1, &corners, sf2_round,
5336 bDistBonded, bBondComm,
5339 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5342 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5345 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5346 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5347 ind->nsend[zone] = comm->dth[0].nsend_zone;
5348 /* Append data of threads>=1 to the communication buffers */
5349 for (int th = 1; th < numThreads; th++)
5351 const dd_comm_setup_work_t &dth = comm->dth[th];
5353 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5354 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5355 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5356 comm->dth[0].nat += dth.nat;
5357 ind->nsend[zone] += dth.nsend_zone;
5360 /* Clear the counts in case we do not have pbc */
5361 for (zone = nzone_send; zone < nzone; zone++)
5363 ind->nsend[zone] = 0;
5365 ind->nsend[nzone] = ind->index.size();
5366 ind->nsend[nzone + 1] = comm->dth[0].nat;
5367 /* Communicate the number of cg's and atoms to receive */
5368 ddSendrecv(dd, dim_ind, dddirBackward,
5369 ind->nsend, nzone+2,
5370 ind->nrecv, nzone+2);
5374 /* We can receive in place if only the last zone is not empty */
5375 for (zone = 0; zone < nzone-1; zone++)
5377 if (ind->nrecv[zone] > 0)
5379 cd->receiveInPlace = false;
5384 int receiveBufferSize = 0;
5385 if (!cd->receiveInPlace)
5387 receiveBufferSize = ind->nrecv[nzone];
5389 /* These buffer are actually only needed with in-place */
5390 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5391 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5393 dd_comm_setup_work_t &work = comm->dth[0];
5395 /* Make space for the global cg indices */
5396 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5397 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5398 /* Communicate the global cg indices */
5399 gmx::ArrayRef<int> integerBufferRef;
5400 if (cd->receiveInPlace)
5402 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5406 integerBufferRef = globalAtomGroupBuffer.buffer;
5408 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5409 work.atomGroupBuffer, integerBufferRef);
5411 /* Make space for cg_cm */
5412 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5413 if (fr->cutoff_scheme == ecutsGROUP)
5419 cg_cm = as_rvec_array(state->x.data());
5421 /* Communicate cg_cm */
5422 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5423 if (cd->receiveInPlace)
5425 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5429 rvecBufferRef = rvecBuffer.buffer;
5431 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5432 work.positionBuffer, rvecBufferRef);
5434 /* Make the charge group index */
5435 if (cd->receiveInPlace)
5437 zone = (p == 0 ? 0 : nzone - 1);
5438 while (zone < nzone)
5440 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5442 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5443 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5444 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5445 dd->atomGrouping_.appendBlock(nrcg);
5448 /* Update the charge group presence,
5449 * so we can use it in the next pass of the loop.
5451 comm->bLocalCG[cg_gl] = TRUE;
5457 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5460 zone_cg_range[nzone+zone] = pos_cg;
5465 /* This part of the code is never executed with bBondComm. */
5466 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5467 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5469 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5470 dd->globalAtomGroupIndices, integerBufferRef.data(),
5471 cg_cm, as_rvec_array(rvecBufferRef.data()),
5473 fr->cginfo_mb, fr->cginfo);
5474 pos_cg += ind->nrecv[nzone];
5476 nat_tot += ind->nrecv[nzone+1];
5478 if (!cd->receiveInPlace)
5480 /* Store the atom block for easy copying of communication buffers */
5481 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5486 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5490 /* We don't need to update cginfo, since that was alrady done above.
5491 * So we pass NULL for the forcerec.
5493 dd_set_cginfo(dd->globalAtomGroupIndices,
5494 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5495 nullptr, comm->bLocalCG);
5500 fprintf(debug, "Finished setting up DD communication, zones:");
5501 for (c = 0; c < zones->n; c++)
5503 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5505 fprintf(debug, "\n");
5509 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5513 for (c = 0; c < zones->nizone; c++)
5515 zones->izone[c].cg1 = zones->cg_range[c+1];
5516 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5517 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5521 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5523 * Also sets the atom density for the home zone when \p zone_start=0.
5524 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5525 * how many charge groups will move but are still part of the current range.
5526 * \todo When converting domdec to use proper classes, all these variables
5527 * should be private and a method should return the correct count
5528 * depending on an internal state.
5530 * \param[in,out] dd The domain decomposition struct
5531 * \param[in] box The box
5532 * \param[in] ddbox The domain decomposition box struct
5533 * \param[in] zone_start The start of the zone range to set sizes for
5534 * \param[in] zone_end The end of the zone range to set sizes for
5535 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5537 static void set_zones_size(gmx_domdec_t *dd,
5538 matrix box, const gmx_ddbox_t *ddbox,
5539 int zone_start, int zone_end,
5540 int numMovedChargeGroupsInHomeZone)
5542 gmx_domdec_comm_t *comm;
5543 gmx_domdec_zones_t *zones;
5552 zones = &comm->zones;
5554 /* Do we need to determine extra distances for multi-body bondeds? */
5555 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5557 for (z = zone_start; z < zone_end; z++)
5559 /* Copy cell limits to zone limits.
5560 * Valid for non-DD dims and non-shifted dims.
5562 copy_rvec(comm->cell_x0, zones->size[z].x0);
5563 copy_rvec(comm->cell_x1, zones->size[z].x1);
5566 for (d = 0; d < dd->ndim; d++)
5570 for (z = 0; z < zones->n; z++)
5572 /* With a staggered grid we have different sizes
5573 * for non-shifted dimensions.
5575 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5579 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5580 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5584 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5585 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5591 rcmbs = comm->cutoff_mbody;
5592 if (ddbox->tric_dir[dim])
5594 rcs /= ddbox->skew_fac[dim];
5595 rcmbs /= ddbox->skew_fac[dim];
5598 /* Set the lower limit for the shifted zone dimensions */
5599 for (z = zone_start; z < zone_end; z++)
5601 if (zones->shift[z][dim] > 0)
5604 if (!isDlbOn(dd->comm) || d == 0)
5606 zones->size[z].x0[dim] = comm->cell_x1[dim];
5607 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5611 /* Here we take the lower limit of the zone from
5612 * the lowest domain of the zone below.
5616 zones->size[z].x0[dim] =
5617 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5623 zones->size[z].x0[dim] =
5624 zones->size[zone_perm[2][z-4]].x0[dim];
5628 zones->size[z].x0[dim] =
5629 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5632 /* A temporary limit, is updated below */
5633 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5637 for (zi = 0; zi < zones->nizone; zi++)
5639 if (zones->shift[zi][dim] == 0)
5641 /* This takes the whole zone into account.
5642 * With multiple pulses this will lead
5643 * to a larger zone then strictly necessary.
5645 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5646 zones->size[zi].x1[dim]+rcmbs);
5654 /* Loop over the i-zones to set the upper limit of each
5657 for (zi = 0; zi < zones->nizone; zi++)
5659 if (zones->shift[zi][dim] == 0)
5661 /* We should only use zones up to zone_end */
5662 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5663 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5665 if (zones->shift[z][dim] > 0)
5667 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5668 zones->size[zi].x1[dim]+rcs);
5675 for (z = zone_start; z < zone_end; z++)
5677 /* Initialization only required to keep the compiler happy */
5678 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5681 /* To determine the bounding box for a zone we need to find
5682 * the extreme corners of 4, 2 or 1 corners.
5684 nc = 1 << (ddbox->nboundeddim - 1);
5686 for (c = 0; c < nc; c++)
5688 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5692 corner[YY] = zones->size[z].x0[YY];
5696 corner[YY] = zones->size[z].x1[YY];
5700 corner[ZZ] = zones->size[z].x0[ZZ];
5704 corner[ZZ] = zones->size[z].x1[ZZ];
5706 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5707 box[ZZ][1 - dd->dim[0]] != 0)
5709 /* With 1D domain decomposition the cg's are not in
5710 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5711 * Shift the corner of the z-vector back to along the box
5712 * vector of dimension d, so it will later end up at 0 along d.
5713 * This can affect the location of this corner along dd->dim[0]
5714 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5716 int d = 1 - dd->dim[0];
5718 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5720 /* Apply the triclinic couplings */
5721 assert(ddbox->npbcdim <= DIM);
5722 for (i = YY; i < ddbox->npbcdim; i++)
5724 for (j = XX; j < i; j++)
5726 corner[j] += corner[i]*box[i][j]/box[i][i];
5731 copy_rvec(corner, corner_min);
5732 copy_rvec(corner, corner_max);
5736 for (i = 0; i < DIM; i++)
5738 corner_min[i] = std::min(corner_min[i], corner[i]);
5739 corner_max[i] = std::max(corner_max[i], corner[i]);
5743 /* Copy the extreme cornes without offset along x */
5744 for (i = 0; i < DIM; i++)
5746 zones->size[z].bb_x0[i] = corner_min[i];
5747 zones->size[z].bb_x1[i] = corner_max[i];
5749 /* Add the offset along x */
5750 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5751 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5754 if (zone_start == 0)
5757 for (dim = 0; dim < DIM; dim++)
5759 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5761 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5766 for (z = zone_start; z < zone_end; z++)
5768 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5770 zones->size[z].x0[XX], zones->size[z].x1[XX],
5771 zones->size[z].x0[YY], zones->size[z].x1[YY],
5772 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5773 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5775 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5776 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5777 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5782 static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
5787 return a.ind_gl < b.ind_gl;
5789 return a.nsc < b.nsc;
5792 /* Order data in \p dataToSort according to \p sort
5794 * Note: both buffers should have at least \p sort.size() elements.
5796 template <typename T>
5798 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5799 gmx::ArrayRef<T> dataToSort,
5800 gmx::ArrayRef<T> sortBuffer)
5802 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5803 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5805 /* Order the data into the temporary buffer */
5807 for (const gmx_cgsort_t &entry : sort)
5809 sortBuffer[i++] = dataToSort[entry.ind];
5812 /* Copy back to the original array */
5813 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5814 dataToSort.begin());
5817 /* Order data in \p dataToSort according to \p sort
5819 * Note: \p vectorToSort should have at least \p sort.size() elements,
5820 * \p workVector is resized when it is too small.
5822 template <typename T>
5824 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5825 gmx::ArrayRef<T> vectorToSort,
5826 std::vector<T> *workVector)
5828 if (gmx::index(workVector->size()) < sort.size())
5830 workVector->resize(sort.size());
5832 orderVector<T>(sort, vectorToSort, *workVector);
5835 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5836 gmx::ArrayRef<const gmx_cgsort_t> sort,
5837 gmx::ArrayRef<gmx::RVec> v,
5838 gmx::ArrayRef<gmx::RVec> buf)
5840 if (atomGroups == nullptr)
5842 /* Avoid the useless loop of the atoms within a cg */
5843 orderVector(sort, v, buf);
5848 /* Order the data */
5850 for (const gmx_cgsort_t &entry : sort)
5852 for (int i : atomGroups->block(entry.ind))
5854 copy_rvec(v[i], buf[a]);
5860 /* Copy back to the original array */
5861 for (int a = 0; a < atot; a++)
5863 copy_rvec(buf[a], v[a]);
5867 /* Returns whether a < b */
5868 static bool compareCgsort(const gmx_cgsort_t &a,
5869 const gmx_cgsort_t &b)
5871 return (a.nsc < b.nsc ||
5872 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5875 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5876 gmx::ArrayRef<gmx_cgsort_t> moved,
5877 std::vector<gmx_cgsort_t> *sort1)
5879 /* The new indices are not very ordered, so we qsort them */
5880 std::sort(moved.begin(), moved.end(), comp_cgsort);
5882 /* stationary is already ordered, so now we can merge the two arrays */
5883 sort1->resize(stationary.size() + moved.size());
5884 std::merge(stationary.begin(), stationary.end(),
5885 moved.begin(), moved.end(),
5890 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5891 * The order is according to the global charge group index.
5892 * This adds and removes charge groups that moved between domains.
5894 static void dd_sort_order(const gmx_domdec_t *dd,
5895 const t_forcerec *fr,
5897 gmx_domdec_sort_t *sort)
5899 const int *a = fr->ns->grid->cell_index;
5901 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5903 if (ncg_home_old >= 0)
5905 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5906 std::vector<gmx_cgsort_t> &moved = sort->moved;
5908 /* The charge groups that remained in the same ns grid cell
5909 * are completely ordered. So we can sort efficiently by sorting
5910 * the charge groups that did move into the stationary list.
5911 * Note: push_back() seems to be slightly slower than direct access.
5915 for (int i = 0; i < dd->ncg_home; i++)
5917 /* Check if this cg did not move to another node */
5918 if (a[i] < movedValue)
5922 entry.ind_gl = dd->globalAtomGroupIndices[i];
5925 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5927 /* This cg is new on this node or moved ns grid cell */
5928 moved.push_back(entry);
5932 /* This cg did not move */
5933 stationary.push_back(entry);
5940 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5941 stationary.size(), moved.size());
5943 /* Sort efficiently */
5944 orderedSort(stationary, moved, &sort->sorted);
5948 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5950 cgsort.reserve(dd->ncg_home);
5952 for (int i = 0; i < dd->ncg_home; i++)
5954 /* Sort on the ns grid cell indices
5955 * and the global topology index
5959 entry.ind_gl = dd->globalAtomGroupIndices[i];
5961 cgsort.push_back(entry);
5962 if (cgsort[i].nsc < movedValue)
5969 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
5971 /* Determine the order of the charge groups using qsort */
5972 std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
5974 /* Remove the charge groups which are no longer at home here */
5975 cgsort.resize(numCGNew);
5979 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
5980 static void dd_sort_order_nbnxn(const t_forcerec *fr,
5981 std::vector<gmx_cgsort_t> *sort)
5983 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
5985 /* Using push_back() instead of this resize results in much slower code */
5986 sort->resize(atomOrder.size());
5987 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
5988 size_t numSorted = 0;
5989 for (int i : atomOrder)
5993 /* The values of nsc and ind_gl are not used in this case */
5994 buffer[numSorted++].ind = i;
5997 sort->resize(numSorted);
6000 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6003 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6005 switch (fr->cutoff_scheme)
6008 dd_sort_order(dd, fr, ncg_home_old, sort);
6011 dd_sort_order_nbnxn(fr, &sort->sorted);
6014 gmx_incons("unimplemented");
6017 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6019 /* We alloc with the old size, since cgindex is still old */
6020 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6021 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6023 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6025 /* Set the new home atom/charge group count */
6026 dd->ncg_home = sort->sorted.size();
6029 fprintf(debug, "Set the new home charge group count to %d\n",
6033 /* Reorder the state */
6034 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6035 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6037 if (state->flags & (1 << estX))
6039 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6041 if (state->flags & (1 << estV))
6043 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6045 if (state->flags & (1 << estCGP))
6047 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6050 if (fr->cutoff_scheme == ecutsGROUP)
6053 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6054 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6057 /* Reorder the global cg index */
6058 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6059 /* Reorder the cginfo */
6060 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6061 /* Rebuild the local cg index */
6064 /* We make a new, ordered atomGroups object and assign it to
6065 * the old one. This causes some allocation overhead, but saves
6066 * a copy back of the whole index.
6068 gmx::RangePartitioning ordered;
6069 for (const gmx_cgsort_t &entry : cgsort)
6071 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6073 dd->atomGrouping_ = ordered;
6077 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6079 /* Set the home atom number */
6080 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6082 if (fr->cutoff_scheme == ecutsVERLET)
6084 /* The atoms are now exactly in grid order, update the grid order */
6085 nbnxn_set_atomorder(fr->nbv->nbs.get());
6089 /* Copy the sorted ns cell indices back to the ns grid struct */
6090 for (gmx::index i = 0; i < cgsort.size(); i++)
6092 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6094 fr->ns->grid->nr = cgsort.size();
6098 static void add_dd_statistics(gmx_domdec_t *dd)
6100 gmx_domdec_comm_t *comm = dd->comm;
6102 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6104 auto range = static_cast<DDAtomRanges::Type>(i);
6106 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6111 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6113 gmx_domdec_comm_t *comm = dd->comm;
6115 /* Reset all the statistics and counters for total run counting */
6116 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6118 comm->sum_nat[i] = 0;
6122 comm->load_step = 0;
6125 clear_ivec(comm->load_lim);
6130 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6132 gmx_domdec_comm_t *comm = cr->dd->comm;
6134 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6135 gmx_sumd(numRanges, comm->sum_nat, cr);
6137 if (fplog == nullptr)
6142 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6144 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6146 auto range = static_cast<DDAtomRanges::Type>(i);
6147 double av = comm->sum_nat[i]/comm->ndecomp;
6150 case DDAtomRanges::Type::Zones:
6152 " av. #atoms communicated per step for force: %d x %.1f\n",
6155 case DDAtomRanges::Type::Vsites:
6156 if (cr->dd->vsite_comm)
6159 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6160 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6164 case DDAtomRanges::Type::Constraints:
6165 if (cr->dd->constraint_comm)
6168 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6169 1 + ir->nLincsIter, av);
6173 gmx_incons(" Unknown type for DD statistics");
6176 fprintf(fplog, "\n");
6178 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6180 print_dd_load_av(fplog, cr->dd);
6184 // TODO Remove fplog when group scheme and charge groups are gone
6185 void dd_partition_system(FILE *fplog,
6186 const gmx::MDLogger &mdlog,
6188 const t_commrec *cr,
6189 gmx_bool bMasterState,
6191 t_state *state_global,
6192 const gmx_mtop_t *top_global,
6193 const t_inputrec *ir,
6194 t_state *state_local,
6195 PaddedRVecVector *f,
6196 gmx::MDAtoms *mdAtoms,
6197 gmx_localtop_t *top_local,
6200 gmx::Constraints *constr,
6202 gmx_wallcycle *wcycle,
6206 gmx_domdec_comm_t *comm;
6207 gmx_ddbox_t ddbox = {0};
6209 int64_t step_pcoupl;
6210 rvec cell_ns_x0, cell_ns_x1;
6211 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6212 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6213 gmx_bool bRedist, bResortAll;
6214 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6218 wallcycle_start(wcycle, ewcDOMDEC);
6223 // TODO if the update code becomes accessible here, use
6224 // upd->deform for this logic.
6225 bBoxChanged = (bMasterState || inputrecDeform(ir));
6226 if (ir->epc != epcNO)
6228 /* With nstpcouple > 1 pressure coupling happens.
6229 * one step after calculating the pressure.
6230 * Box scaling happens at the end of the MD step,
6231 * after the DD partitioning.
6232 * We therefore have to do DLB in the first partitioning
6233 * after an MD step where P-coupling occurred.
6234 * We need to determine the last step in which p-coupling occurred.
6235 * MRS -- need to validate this for vv?
6237 int n = ir->nstpcouple;
6240 step_pcoupl = step - 1;
6244 step_pcoupl = ((step - 1)/n)*n + 1;
6246 if (step_pcoupl >= comm->partition_step)
6252 bNStGlobalComm = (step % nstglobalcomm == 0);
6260 /* Should we do dynamic load balacing this step?
6261 * Since it requires (possibly expensive) global communication,
6262 * we might want to do DLB less frequently.
6264 if (bBoxChanged || ir->epc != epcNO)
6266 bDoDLB = bBoxChanged;
6270 bDoDLB = bNStGlobalComm;
6274 /* Check if we have recorded loads on the nodes */
6275 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6277 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6279 /* Print load every nstlog, first and last step to the log file */
6280 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6281 comm->n_load_collect == 0 ||
6283 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6285 /* Avoid extra communication due to verbose screen output
6286 * when nstglobalcomm is set.
6288 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6289 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6291 get_load_distribution(dd, wcycle);
6296 GMX_LOG(mdlog.info).asParagraph().appendText(dd_print_load(dd, step-1));
6300 dd_print_load_verbose(dd);
6303 comm->n_load_collect++;
6309 /* Add the measured cycles to the running average */
6310 const float averageFactor = 0.1f;
6311 comm->cyclesPerStepDlbExpAverage =
6312 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6313 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6315 if (comm->dlbState == DlbState::onCanTurnOff &&
6316 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6318 gmx_bool turnOffDlb;
6321 /* If the running averaged cycles with DLB are more
6322 * than before we turned on DLB, turn off DLB.
6323 * We will again run and check the cycles without DLB
6324 * and we can then decide if to turn off DLB forever.
6326 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6327 comm->cyclesPerStepBeforeDLB);
6329 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6332 /* To turn off DLB, we need to redistribute the atoms */
6333 dd_collect_state(dd, state_local, state_global);
6334 bMasterState = TRUE;
6335 turn_off_dlb(mdlog, dd, step);
6339 else if (bCheckWhetherToTurnDlbOn)
6341 gmx_bool turnOffDlbForever = FALSE;
6342 gmx_bool turnOnDlb = FALSE;
6344 /* Since the timings are node dependent, the master decides */
6347 /* If we recently turned off DLB, we want to check if
6348 * performance is better without DLB. We want to do this
6349 * ASAP to minimize the chance that external factors
6350 * slowed down the DLB step are gone here and we
6351 * incorrectly conclude that DLB was causing the slowdown.
6352 * So we measure one nstlist block, no running average.
6354 if (comm->haveTurnedOffDlb &&
6355 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6356 comm->cyclesPerStepDlbExpAverage)
6358 /* After turning off DLB we ran nstlist steps in fewer
6359 * cycles than with DLB. This likely means that DLB
6360 * in not benefical, but this could be due to a one
6361 * time unlucky fluctuation, so we require two such
6362 * observations in close succession to turn off DLB
6365 if (comm->dlbSlowerPartitioningCount > 0 &&
6366 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6368 turnOffDlbForever = TRUE;
6370 comm->haveTurnedOffDlb = false;
6371 /* Register when we last measured DLB slowdown */
6372 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6376 /* Here we check if the max PME rank load is more than 0.98
6377 * the max PP force load. If so, PP DLB will not help,
6378 * since we are (almost) limited by PME. Furthermore,
6379 * DLB will cause a significant extra x/f redistribution
6380 * cost on the PME ranks, which will then surely result
6381 * in lower total performance.
6383 if (cr->npmenodes > 0 &&
6384 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6390 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6396 gmx_bool turnOffDlbForever;
6400 turnOffDlbForever, turnOnDlb
6402 dd_bcast(dd, sizeof(bools), &bools);
6403 if (bools.turnOffDlbForever)
6405 turn_off_dlb_forever(mdlog, dd, step);
6407 else if (bools.turnOnDlb)
6409 turn_on_dlb(mdlog, dd, step);
6414 comm->n_load_have++;
6417 cgs_gl = &comm->cgs_gl;
6422 /* Clear the old state */
6423 clearDDStateIndices(dd, 0, 0);
6426 auto xGlobal = positionsFromStatePointer(state_global);
6428 set_ddbox(dd, true, ir,
6429 DDMASTER(dd) ? state_global->box : nullptr,
6433 distributeState(mdlog, dd, state_global, ddbox, state_local, f);
6435 dd_make_local_cgs(dd, &top_local->cgs);
6437 /* Ensure that we have space for the new distribution */
6438 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6440 if (fr->cutoff_scheme == ecutsGROUP)
6442 calc_cgcm(fplog, 0, dd->ncg_home,
6443 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6446 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6448 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6450 else if (state_local->ddp_count != dd->ddp_count)
6452 if (state_local->ddp_count > dd->ddp_count)
6454 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%ld)", state_local->ddp_count, dd->ddp_count);
6457 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6459 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6462 /* Clear the old state */
6463 clearDDStateIndices(dd, 0, 0);
6465 /* Restore the atom group indices from state_local */
6466 restoreAtomGroups(dd, cgs_gl->index, state_local);
6467 make_dd_indices(dd, cgs_gl->index, 0);
6468 ncgindex_set = dd->ncg_home;
6470 if (fr->cutoff_scheme == ecutsGROUP)
6472 /* Redetermine the cg COMs */
6473 calc_cgcm(fplog, 0, dd->ncg_home,
6474 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6477 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6479 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6481 set_ddbox(dd, bMasterState, ir, state_local->box,
6482 true, state_local->x, &ddbox);
6484 bRedist = isDlbOn(comm);
6488 /* We have the full state, only redistribute the cgs */
6490 /* Clear the non-home indices */
6491 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6494 /* Avoid global communication for dim's without pbc and -gcom */
6495 if (!bNStGlobalComm)
6497 copy_rvec(comm->box0, ddbox.box0 );
6498 copy_rvec(comm->box_size, ddbox.box_size);
6500 set_ddbox(dd, bMasterState, ir, state_local->box,
6501 bNStGlobalComm, state_local->x, &ddbox);
6506 /* For dim's without pbc and -gcom */
6507 copy_rvec(ddbox.box0, comm->box0 );
6508 copy_rvec(ddbox.box_size, comm->box_size);
6510 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6513 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6515 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6518 /* Check if we should sort the charge groups */
6519 const bool bSortCG = (bMasterState || bRedist);
6521 ncg_home_old = dd->ncg_home;
6523 /* When repartitioning we mark charge groups that will move to neighboring
6524 * DD cells, but we do not move them right away for performance reasons.
6525 * Thus we need to keep track of how many charge groups will move for
6526 * obtaining correct local charge group / atom counts.
6531 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6533 ncgindex_set = dd->ncg_home;
6534 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6538 GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
6540 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6543 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6545 &comm->cell_x0, &comm->cell_x1,
6546 dd->ncg_home, fr->cg_cm,
6547 cell_ns_x0, cell_ns_x1, &grid_density);
6551 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6554 switch (fr->cutoff_scheme)
6557 copy_ivec(fr->ns->grid->n, ncells_old);
6558 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6559 state_local->box, cell_ns_x0, cell_ns_x1,
6560 fr->rlist, grid_density);
6563 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6566 gmx_incons("unimplemented");
6568 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6569 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6573 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6575 /* Sort the state on charge group position.
6576 * This enables exact restarts from this step.
6577 * It also improves performance by about 15% with larger numbers
6578 * of atoms per node.
6581 /* Fill the ns grid with the home cell,
6582 * so we can sort with the indices.
6584 set_zones_ncg_home(dd);
6586 switch (fr->cutoff_scheme)
6589 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6591 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6593 comm->zones.size[0].bb_x0,
6594 comm->zones.size[0].bb_x1,
6596 comm->zones.dens_zone0,
6598 as_rvec_array(state_local->x.data()),
6599 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6600 fr->nbv->grp[eintLocal].kernel_type,
6603 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6606 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6607 0, dd->ncg_home, fr->cg_cm);
6609 copy_ivec(fr->ns->grid->n, ncells_new);
6612 gmx_incons("unimplemented");
6615 bResortAll = bMasterState;
6617 /* Check if we can user the old order and ns grid cell indices
6618 * of the charge groups to sort the charge groups efficiently.
6620 if (ncells_new[XX] != ncells_old[XX] ||
6621 ncells_new[YY] != ncells_old[YY] ||
6622 ncells_new[ZZ] != ncells_old[ZZ])
6629 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6630 gmx_step_str(step, sbuf), dd->ncg_home);
6632 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6633 bResortAll ? -1 : ncg_home_old);
6635 /* After sorting and compacting we set the correct size */
6636 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6638 /* Rebuild all the indices */
6642 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6645 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6647 /* Setup up the communication and communicate the coordinates */
6648 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6650 /* Set the indices */
6651 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6653 /* Set the charge group boundaries for neighbor searching */
6654 set_cg_boundaries(&comm->zones);
6656 if (fr->cutoff_scheme == ecutsVERLET)
6658 /* When bSortCG=true, we have already set the size for zone 0 */
6659 set_zones_size(dd, state_local->box, &ddbox,
6660 bSortCG ? 1 : 0, comm->zones.n,
6664 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6667 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6668 -1,as_rvec_array(state_local->x.data()),state_local->box);
6671 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6673 /* Extract a local topology from the global topology */
6674 for (int i = 0; i < dd->ndim; i++)
6676 np[dd->dim[i]] = comm->cd[i].numPulses();
6678 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6679 comm->cellsize_min, np,
6681 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6682 vsite, top_global, top_local);
6684 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6686 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6688 /* Set up the special atom communication */
6689 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6690 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6692 auto range = static_cast<DDAtomRanges::Type>(i);
6695 case DDAtomRanges::Type::Vsites:
6696 if (vsite && vsite->n_intercg_vsite)
6698 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6701 case DDAtomRanges::Type::Constraints:
6702 if (dd->bInterCGcons || dd->bInterCGsettles)
6704 /* Only for inter-cg constraints we need special code */
6705 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6706 constr, ir->nProjOrder,
6707 top_local->idef.il);
6711 gmx_incons("Unknown special atom type setup");
6713 comm->atomRanges.setEnd(range, n);
6716 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6718 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6720 /* Make space for the extra coordinates for virtual site
6721 * or constraint communication.
6723 state_local->natoms = comm->atomRanges.numAtomsTotal();
6725 dd_resize_state(state_local, f, state_local->natoms);
6727 if (fr->haveDirectVirialContributions)
6729 if (vsite && vsite->n_intercg_vsite)
6731 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6735 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6737 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6741 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6750 /* Set the number of atoms required for the force calculation.
6751 * Forces need to be constrained when doing energy
6752 * minimization. For simple simulations we could avoid some
6753 * allocation, zeroing and copying, but this is probably not worth
6754 * the complications and checking.
6756 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6757 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6758 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6761 /* Update atom data for mdatoms and several algorithms */
6762 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6763 nullptr, mdAtoms, constr, vsite, nullptr);
6765 auto mdatoms = mdAtoms->mdatoms();
6766 if (!thisRankHasDuty(cr, DUTY_PME))
6768 /* Send the charges and/or c6/sigmas to our PME only node */
6769 gmx_pme_send_parameters(cr,
6771 mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
6772 mdatoms->chargeA, mdatoms->chargeB,
6773 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6774 mdatoms->sigmaA, mdatoms->sigmaB,
6775 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6780 /* Update the local pull groups */
6781 dd_make_local_pull_groups(cr, ir->pull_work);
6784 if (dd->atomSets != nullptr)
6786 /* Update the local atom sets */
6787 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6790 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6791 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6793 add_dd_statistics(dd);
6795 /* Make sure we only count the cycles for this DD partitioning */
6796 clear_dd_cycle_counts(dd);
6798 /* Because the order of the atoms might have changed since
6799 * the last vsite construction, we need to communicate the constructing
6800 * atom coordinates again (for spreading the forces this MD step).
6802 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6804 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6806 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6808 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6809 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6810 -1, as_rvec_array(state_local->x.data()), state_local->box);
6813 /* Store the partitioning step */
6814 comm->partition_step = step;
6816 /* Increase the DD partitioning counter */
6818 /* The state currently matches this DD partitioning count, store it */
6819 state_local->ddp_count = dd->ddp_count;
6822 /* The DD master node knows the complete cg distribution,
6823 * store the count so we can possibly skip the cg info communication.
6825 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6828 if (comm->DD_debug > 0)
6830 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6831 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6832 "after partitioning");
6835 wallcycle_stop(wcycle, ewcDOMDEC);
6838 /*! \brief Check whether bonded interactions are missing, if appropriate */
6839 void checkNumberOfBondedInteractions(const gmx::MDLogger &mdlog,
6841 int totalNumberOfBondedInteractions,
6842 const gmx_mtop_t *top_global,
6843 const gmx_localtop_t *top_local,
6844 const t_state *state,
6845 bool *shouldCheckNumberOfBondedInteractions)
6847 if (*shouldCheckNumberOfBondedInteractions)
6849 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6851 dd_print_missing_interactions(mdlog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6853 *shouldCheckNumberOfBondedInteractions = false;