2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/domdec/localatomsetmanager.h"
58 #include "gromacs/ewald/pme.h"
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/pdbio.h"
61 #include "gromacs/gmxlib/chargegroup.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/imd/imd.h"
67 #include "gromacs/listed-forces/manage-threading.h"
68 #include "gromacs/math/functions.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/math/vectypes.h"
71 #include "gromacs/mdlib/calc_verletbuf.h"
72 #include "gromacs/mdlib/constr.h"
73 #include "gromacs/mdlib/constraintrange.h"
74 #include "gromacs/mdlib/forcerec.h"
75 #include "gromacs/mdlib/gmx_omp_nthreads.h"
76 #include "gromacs/mdlib/lincs.h"
77 #include "gromacs/mdlib/mdatoms.h"
78 #include "gromacs/mdlib/mdrun.h"
79 #include "gromacs/mdlib/mdsetup.h"
80 #include "gromacs/mdlib/nb_verlet.h"
81 #include "gromacs/mdlib/nbnxn_grid.h"
82 #include "gromacs/mdlib/nsgrid.h"
83 #include "gromacs/mdlib/vsite.h"
84 #include "gromacs/mdtypes/commrec.h"
85 #include "gromacs/mdtypes/df_history.h"
86 #include "gromacs/mdtypes/forcerec.h"
87 #include "gromacs/mdtypes/inputrec.h"
88 #include "gromacs/mdtypes/md_enums.h"
89 #include "gromacs/mdtypes/mdatom.h"
90 #include "gromacs/mdtypes/nblist.h"
91 #include "gromacs/mdtypes/state.h"
92 #include "gromacs/pbcutil/ishift.h"
93 #include "gromacs/pbcutil/pbc.h"
94 #include "gromacs/pulling/pull.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/logger.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/strconvert.h"
112 #include "gromacs/utility/stringstream.h"
113 #include "gromacs/utility/stringutil.h"
114 #include "gromacs/utility/textwriter.h"
116 #include "atomdistribution.h"
117 #include "cellsizes.h"
118 #include "distribute.h"
119 #include "domdec_constraints.h"
120 #include "domdec_internal.h"
121 #include "domdec_vsite.h"
122 #include "redistribute.h"
125 #define DD_NLOAD_MAX 9
127 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
129 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
132 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
133 #define DD_FLAG_NRCG 65535
134 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
135 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
137 /* The DD zone order */
138 static const ivec dd_zo[DD_MAXZONE] =
139 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
141 /* The non-bonded zone-pair setup for domain decomposition
142 * The first number is the i-zone, the second number the first j-zone seen by
143 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
144 * As is, this is for 3D decomposition, where there are 4 i-zones.
145 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
146 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
149 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
154 /* Turn on DLB when the load imbalance causes this amount of total loss.
155 * There is a bit of overhead with DLB and it's difficult to achieve
156 * a load imbalance of less than 2% with DLB.
158 #define DD_PERF_LOSS_DLB_ON 0.02
160 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
161 #define DD_PERF_LOSS_WARN 0.05
164 /* We check if to turn on DLB at the first and every 100 DD partitionings.
165 * With large imbalance DLB will turn on at the first step, so we can
166 * make the interval so large that the MPI overhead of the check is negligible.
168 static const int c_checkTurnDlbOnInterval = 100;
169 /* We need to check if DLB results in worse performance and then turn it off.
170 * We check this more often then for turning DLB on, because the DLB can scale
171 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
172 * and furthermore, we are already synchronizing often with DLB, so
173 * the overhead of the MPI Bcast is not that high.
175 static const int c_checkTurnDlbOffInterval = 20;
177 /* Forward declaration */
178 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
182 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
184 static void index2xyz(ivec nc,int ind,ivec xyz)
186 xyz[XX] = ind % nc[XX];
187 xyz[YY] = (ind / nc[XX]) % nc[YY];
188 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
192 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
194 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
195 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
196 xyz[ZZ] = ind % nc[ZZ];
199 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
204 ddindex = dd_index(dd->nc, c);
205 if (dd->comm->bCartesianPP_PME)
207 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
209 else if (dd->comm->bCartesianPP)
212 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
223 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
225 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
228 int ddglatnr(const gmx_domdec_t *dd, int i)
238 if (i >= dd->comm->atomRanges.numAtomsTotal())
240 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
242 atnr = dd->globalAtomIndices[i] + 1;
248 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
250 return &dd->comm->cgs_gl;
253 void dd_store_state(gmx_domdec_t *dd, t_state *state)
257 if (state->ddp_count != dd->ddp_count)
259 gmx_incons("The MD state does not match the domain decomposition state");
262 state->cg_gl.resize(dd->ncg_home);
263 for (i = 0; i < dd->ncg_home; i++)
265 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
268 state->ddp_count_cg_gl = dd->ddp_count;
271 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
273 return &dd->comm->zones;
276 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
277 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
279 gmx_domdec_zones_t *zones;
282 zones = &dd->comm->zones;
285 while (icg >= zones->izone[izone].cg1)
294 else if (izone < zones->nizone)
296 *jcg0 = zones->izone[izone].jcg0;
300 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
301 icg, izone, zones->nizone);
304 *jcg1 = zones->izone[izone].jcg1;
306 for (d = 0; d < dd->ndim; d++)
309 shift0[dim] = zones->izone[izone].shift0[dim];
310 shift1[dim] = zones->izone[izone].shift1[dim];
311 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
313 /* A conservative approach, this can be optimized */
320 int dd_numHomeAtoms(const gmx_domdec_t &dd)
322 return dd.comm->atomRanges.numHomeAtoms();
325 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
327 /* We currently set mdatoms entries for all atoms:
328 * local + non-local + communicated for vsite + constraints
331 return dd->comm->atomRanges.numAtomsTotal();
334 int dd_natoms_vsite(const gmx_domdec_t *dd)
336 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
339 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
341 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
342 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
345 void dd_move_x(gmx_domdec_t *dd,
347 gmx::ArrayRef<gmx::RVec> x,
348 gmx_wallcycle *wcycle)
350 wallcycle_start(wcycle, ewcMOVEX);
353 gmx_domdec_comm_t *comm;
354 gmx_domdec_comm_dim_t *cd;
355 rvec shift = {0, 0, 0};
356 gmx_bool bPBC, bScrew;
360 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
363 nat_tot = comm->atomRanges.numHomeAtoms();
364 for (int d = 0; d < dd->ndim; d++)
366 bPBC = (dd->ci[dd->dim[d]] == 0);
367 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
370 copy_rvec(box[dd->dim[d]], shift);
373 for (const gmx_domdec_ind_t &ind : cd->ind)
375 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
376 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
380 for (int g : ind.index)
382 for (int j : atomGrouping.block(g))
384 sendBuffer[n] = x[j];
391 for (int g : ind.index)
393 for (int j : atomGrouping.block(g))
395 /* We need to shift the coordinates */
396 for (int d = 0; d < DIM; d++)
398 sendBuffer[n][d] = x[j][d] + shift[d];
406 for (int g : ind.index)
408 for (int j : atomGrouping.block(g))
411 sendBuffer[n][XX] = x[j][XX] + shift[XX];
413 * This operation requires a special shift force
414 * treatment, which is performed in calc_vir.
416 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
417 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
423 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
425 gmx::ArrayRef<gmx::RVec> receiveBuffer;
426 if (cd->receiveInPlace)
428 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
432 receiveBuffer = receiveBufferAccess.buffer;
434 /* Send and receive the coordinates */
435 ddSendrecv(dd, d, dddirBackward,
436 sendBuffer, receiveBuffer);
438 if (!cd->receiveInPlace)
441 for (int zone = 0; zone < nzone; zone++)
443 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
445 x[i] = receiveBuffer[j++];
449 nat_tot += ind.nrecv[nzone+1];
454 wallcycle_stop(wcycle, ewcMOVEX);
457 void dd_move_f(gmx_domdec_t *dd,
458 gmx::ArrayRef<gmx::RVec> f,
460 gmx_wallcycle *wcycle)
462 wallcycle_start(wcycle, ewcMOVEF);
465 gmx_domdec_comm_t *comm;
466 gmx_domdec_comm_dim_t *cd;
469 gmx_bool bShiftForcesNeedPbc, bScrew;
473 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
475 nzone = comm->zones.n/2;
476 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
477 for (int d = dd->ndim-1; d >= 0; d--)
479 /* Only forces in domains near the PBC boundaries need to
480 consider PBC in the treatment of fshift */
481 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
482 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
483 if (fshift == nullptr && !bScrew)
485 bShiftForcesNeedPbc = FALSE;
487 /* Determine which shift vector we need */
493 for (int p = cd->numPulses() - 1; p >= 0; p--)
495 const gmx_domdec_ind_t &ind = cd->ind[p];
496 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
497 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
499 nat_tot -= ind.nrecv[nzone+1];
501 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
503 gmx::ArrayRef<gmx::RVec> sendBuffer;
504 if (cd->receiveInPlace)
506 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
510 sendBuffer = sendBufferAccess.buffer;
512 for (int zone = 0; zone < nzone; zone++)
514 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
516 sendBuffer[j++] = f[i];
520 /* Communicate the forces */
521 ddSendrecv(dd, d, dddirForward,
522 sendBuffer, receiveBuffer);
523 /* Add the received forces */
525 if (!bShiftForcesNeedPbc)
527 for (int g : ind.index)
529 for (int j : atomGrouping.block(g))
531 for (int d = 0; d < DIM; d++)
533 f[j][d] += receiveBuffer[n][d];
541 /* fshift should always be defined if this function is
542 * called when bShiftForcesNeedPbc is true */
543 assert(nullptr != fshift);
544 for (int g : ind.index)
546 for (int j : atomGrouping.block(g))
548 for (int d = 0; d < DIM; d++)
550 f[j][d] += receiveBuffer[n][d];
552 /* Add this force to the shift force */
553 for (int d = 0; d < DIM; d++)
555 fshift[is][d] += receiveBuffer[n][d];
563 for (int g : ind.index)
565 for (int j : atomGrouping.block(g))
567 /* Rotate the force */
568 f[j][XX] += receiveBuffer[n][XX];
569 f[j][YY] -= receiveBuffer[n][YY];
570 f[j][ZZ] -= receiveBuffer[n][ZZ];
573 /* Add this force to the shift force */
574 for (int d = 0; d < DIM; d++)
576 fshift[is][d] += receiveBuffer[n][d];
586 wallcycle_stop(wcycle, ewcMOVEF);
589 /* Convenience function for extracting a real buffer from an rvec buffer
591 * To reduce the number of temporary communication buffers and avoid
592 * cache polution, we reuse gmx::RVec buffers for storing reals.
593 * This functions return a real buffer reference with the same number
594 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
596 static gmx::ArrayRef<real>
597 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
599 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
603 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
606 gmx_domdec_comm_t *comm;
607 gmx_domdec_comm_dim_t *cd;
611 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
614 nat_tot = comm->atomRanges.numHomeAtoms();
615 for (int d = 0; d < dd->ndim; d++)
618 for (const gmx_domdec_ind_t &ind : cd->ind)
620 /* Note: We provision for RVec instead of real, so a factor of 3
621 * more than needed. The buffer actually already has this size
622 * and we pass a plain pointer below, so this does not matter.
624 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
625 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
627 for (int g : ind.index)
629 for (int j : atomGrouping.block(g))
631 sendBuffer[n++] = v[j];
635 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
637 gmx::ArrayRef<real> receiveBuffer;
638 if (cd->receiveInPlace)
640 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
644 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
646 /* Send and receive the data */
647 ddSendrecv(dd, d, dddirBackward,
648 sendBuffer, receiveBuffer);
649 if (!cd->receiveInPlace)
652 for (int zone = 0; zone < nzone; zone++)
654 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
656 v[i] = receiveBuffer[j++];
660 nat_tot += ind.nrecv[nzone+1];
666 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
669 gmx_domdec_comm_t *comm;
670 gmx_domdec_comm_dim_t *cd;
674 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
676 nzone = comm->zones.n/2;
677 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
678 for (int d = dd->ndim-1; d >= 0; d--)
681 for (int p = cd->numPulses() - 1; p >= 0; p--)
683 const gmx_domdec_ind_t &ind = cd->ind[p];
685 /* Note: We provision for RVec instead of real, so a factor of 3
686 * more than needed. The buffer actually already has this size
687 * and we typecast, so this works as intended.
689 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
690 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
691 nat_tot -= ind.nrecv[nzone + 1];
693 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
695 gmx::ArrayRef<real> sendBuffer;
696 if (cd->receiveInPlace)
698 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
702 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
704 for (int zone = 0; zone < nzone; zone++)
706 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
708 sendBuffer[j++] = v[i];
712 /* Communicate the forces */
713 ddSendrecv(dd, d, dddirForward,
714 sendBuffer, receiveBuffer);
715 /* Add the received forces */
717 for (int g : ind.index)
719 for (int j : atomGrouping.block(g))
721 v[j] += receiveBuffer[n];
730 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
732 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
734 zone->min0, zone->max1,
735 zone->mch0, zone->mch0,
736 zone->p1_0, zone->p1_1);
739 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
740 * takes the extremes over all home and remote zones in the halo
741 * and returns the results in cell_ns_x0 and cell_ns_x1.
742 * Note: only used with the group cut-off scheme.
744 static void dd_move_cellx(gmx_domdec_t *dd,
745 const gmx_ddbox_t *ddbox,
749 constexpr int c_ddZoneCommMaxNumZones = 5;
750 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
751 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
752 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
753 gmx_domdec_comm_t *comm = dd->comm;
757 for (int d = 1; d < dd->ndim; d++)
759 int dim = dd->dim[d];
760 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
762 /* Copy the base sizes of the home zone */
763 zp.min0 = cell_ns_x0[dim];
764 zp.max1 = cell_ns_x1[dim];
765 zp.min1 = cell_ns_x1[dim];
766 zp.mch0 = cell_ns_x0[dim];
767 zp.mch1 = cell_ns_x1[dim];
768 zp.p1_0 = cell_ns_x0[dim];
769 zp.p1_1 = cell_ns_x1[dim];
772 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
774 /* Loop backward over the dimensions and aggregate the extremes
777 for (int d = dd->ndim - 2; d >= 0; d--)
779 const int dim = dd->dim[d];
780 const bool applyPbc = (dim < ddbox->npbcdim);
782 /* Use an rvec to store two reals */
783 extr_s[d][0] = cellsizes[d + 1].fracLower;
784 extr_s[d][1] = cellsizes[d + 1].fracUpper;
785 extr_s[d][2] = cellsizes[d + 1].fracUpper;
788 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
789 /* Store the extremes in the backward sending buffer,
790 * so they get updated separately from the forward communication.
792 for (int d1 = d; d1 < dd->ndim-1; d1++)
794 /* We invert the order to be able to use the same loop for buf_e */
795 buf_s[pos].min0 = extr_s[d1][1];
796 buf_s[pos].max1 = extr_s[d1][0];
797 buf_s[pos].min1 = extr_s[d1][2];
800 /* Store the cell corner of the dimension we communicate along */
801 buf_s[pos].p1_0 = comm->cell_x0[dim];
806 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
809 if (dd->ndim == 3 && d == 0)
811 buf_s[pos] = comm->zone_d2[0][1];
813 buf_s[pos] = comm->zone_d1[0];
817 /* We only need to communicate the extremes
818 * in the forward direction
820 int numPulses = comm->cd[d].numPulses();
824 /* Take the minimum to avoid double communication */
825 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
829 /* Without PBC we should really not communicate over
830 * the boundaries, but implementing that complicates
831 * the communication setup and therefore we simply
832 * do all communication, but ignore some data.
834 numPulsesMin = numPulses;
836 for (int pulse = 0; pulse < numPulsesMin; pulse++)
838 /* Communicate the extremes forward */
839 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
841 int numElements = dd->ndim - d - 1;
842 ddSendrecv(dd, d, dddirForward,
843 extr_s + d, numElements,
844 extr_r + d, numElements);
846 if (receiveValidData)
848 for (int d1 = d; d1 < dd->ndim - 1; d1++)
850 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
851 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
852 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
857 const int numElementsInBuffer = pos;
858 for (int pulse = 0; pulse < numPulses; pulse++)
860 /* Communicate all the zone information backward */
861 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
863 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
865 int numReals = numElementsInBuffer*c_ddzoneNumReals;
866 ddSendrecv(dd, d, dddirBackward,
867 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
868 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
873 for (int d1 = d + 1; d1 < dd->ndim; d1++)
875 /* Determine the decrease of maximum required
876 * communication height along d1 due to the distance along d,
877 * this avoids a lot of useless atom communication.
879 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
882 if (ddbox->tric_dir[dim])
884 /* c is the off-diagonal coupling between the cell planes
885 * along directions d and d1.
887 c = ddbox->v[dim][dd->dim[d1]][dim];
893 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
896 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
900 /* A negative value signals out of range */
906 /* Accumulate the extremes over all pulses */
907 for (int i = 0; i < numElementsInBuffer; i++)
915 if (receiveValidData)
917 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
918 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
919 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
923 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
931 if (receiveValidData && dh[d1] >= 0)
933 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
934 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
937 /* Copy the received buffer to the send buffer,
938 * to pass the data through with the next pulse.
942 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
943 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
945 /* Store the extremes */
948 for (int d1 = d; d1 < dd->ndim-1; d1++)
950 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
951 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
952 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
956 if (d == 1 || (d == 0 && dd->ndim == 3))
958 for (int i = d; i < 2; i++)
960 comm->zone_d2[1-d][i] = buf_e[pos];
966 comm->zone_d1[1] = buf_e[pos];
975 int dim = dd->dim[1];
976 for (int i = 0; i < 2; i++)
980 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
982 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
983 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
988 int dim = dd->dim[2];
989 for (int i = 0; i < 2; i++)
991 for (int j = 0; j < 2; j++)
995 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
997 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
998 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1002 for (int d = 1; d < dd->ndim; d++)
1004 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1005 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1008 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1009 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1014 static void write_dd_grid_pdb(const char *fn, int64_t step,
1015 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1017 rvec grid_s[2], *grid_r = nullptr, cx, r;
1018 char fname[STRLEN], buf[22];
1020 int a, i, d, z, y, x;
1024 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1025 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1029 snew(grid_r, 2*dd->nnodes);
1032 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1036 for (d = 0; d < DIM; d++)
1038 for (i = 0; i < DIM; i++)
1046 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1048 tric[d][i] = box[i][d]/box[i][i];
1057 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1058 out = gmx_fio_fopen(fname, "w");
1059 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1061 for (i = 0; i < dd->nnodes; i++)
1063 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1064 for (d = 0; d < DIM; d++)
1066 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1068 for (z = 0; z < 2; z++)
1070 for (y = 0; y < 2; y++)
1072 for (x = 0; x < 2; x++)
1074 cx[XX] = grid_r[i*2+x][XX];
1075 cx[YY] = grid_r[i*2+y][YY];
1076 cx[ZZ] = grid_r[i*2+z][ZZ];
1078 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1079 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1083 for (d = 0; d < DIM; d++)
1085 for (x = 0; x < 4; x++)
1089 case 0: y = 1 + i*8 + 2*x; break;
1090 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1091 case 2: y = 1 + i*8 + x; break;
1093 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1097 gmx_fio_fclose(out);
1102 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1103 const gmx_mtop_t *mtop, const t_commrec *cr,
1104 int natoms, const rvec x[], const matrix box)
1106 char fname[STRLEN], buf[22];
1109 const char *atomname, *resname;
1115 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1118 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1120 out = gmx_fio_fopen(fname, "w");
1122 fprintf(out, "TITLE %s\n", title);
1123 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1125 for (int i = 0; i < natoms; i++)
1127 int ii = dd->globalAtomIndices[i];
1128 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1131 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1134 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1140 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1142 b = dd->comm->zones.n;
1146 b = dd->comm->zones.n + 1;
1148 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1149 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1151 fprintf(out, "TER\n");
1153 gmx_fio_fclose(out);
1156 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1158 gmx_domdec_comm_t *comm;
1165 if (comm->bInterCGBondeds)
1167 if (comm->cutoff_mbody > 0)
1169 r = comm->cutoff_mbody;
1173 /* cutoff_mbody=0 means we do not have DLB */
1174 r = comm->cellsize_min[dd->dim[0]];
1175 for (di = 1; di < dd->ndim; di++)
1177 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1179 if (comm->bBondComm)
1181 r = std::max(r, comm->cutoff_mbody);
1185 r = std::min(r, comm->cutoff);
1193 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1197 r_mb = dd_cutoff_multibody(dd);
1199 return std::max(dd->comm->cutoff, r_mb);
1203 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1208 nc = dd->nc[dd->comm->cartpmedim];
1209 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1210 copy_ivec(coord, coord_pme);
1211 coord_pme[dd->comm->cartpmedim] =
1212 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1215 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1220 npme = dd->comm->npmenodes;
1222 /* Here we assign a PME node to communicate with this DD node
1223 * by assuming that the major index of both is x.
1224 * We add cr->npmenodes/2 to obtain an even distribution.
1226 return (ddindex*npme + npme/2)/npp;
1229 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1234 snew(pme_rank, dd->comm->npmenodes);
1236 for (i = 0; i < dd->nnodes; i++)
1238 p0 = ddindex2pmeindex(dd, i);
1239 p1 = ddindex2pmeindex(dd, i+1);
1240 if (i+1 == dd->nnodes || p1 > p0)
1244 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1246 pme_rank[n] = i + 1 + n;
1254 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1262 if (dd->comm->bCartesian) {
1263 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1264 dd_coords2pmecoords(dd,coords,coords_pme);
1265 copy_ivec(dd->ntot,nc);
1266 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1267 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1269 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1271 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1277 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1282 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1284 gmx_domdec_comm_t *comm;
1286 int ddindex, nodeid = -1;
1288 comm = cr->dd->comm;
1293 if (comm->bCartesianPP_PME)
1296 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1301 ddindex = dd_index(cr->dd->nc, coords);
1302 if (comm->bCartesianPP)
1304 nodeid = comm->ddindex2simnodeid[ddindex];
1310 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1322 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1323 const t_commrec gmx_unused *cr,
1328 const gmx_domdec_comm_t *comm = dd->comm;
1330 /* This assumes a uniform x domain decomposition grid cell size */
1331 if (comm->bCartesianPP_PME)
1334 ivec coord, coord_pme;
1335 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1336 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1338 /* This is a PP node */
1339 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1340 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1344 else if (comm->bCartesianPP)
1346 if (sim_nodeid < dd->nnodes)
1348 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1353 /* This assumes DD cells with identical x coordinates
1354 * are numbered sequentially.
1356 if (dd->comm->pmenodes == nullptr)
1358 if (sim_nodeid < dd->nnodes)
1360 /* The DD index equals the nodeid */
1361 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1367 while (sim_nodeid > dd->comm->pmenodes[i])
1371 if (sim_nodeid < dd->comm->pmenodes[i])
1373 pmenode = dd->comm->pmenodes[i];
1381 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1386 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1397 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1401 ivec coord, coord_pme;
1405 std::vector<int> ddranks;
1406 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1408 for (x = 0; x < dd->nc[XX]; x++)
1410 for (y = 0; y < dd->nc[YY]; y++)
1412 for (z = 0; z < dd->nc[ZZ]; z++)
1414 if (dd->comm->bCartesianPP_PME)
1419 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1420 if (dd->ci[XX] == coord_pme[XX] &&
1421 dd->ci[YY] == coord_pme[YY] &&
1422 dd->ci[ZZ] == coord_pme[ZZ])
1424 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1429 /* The slab corresponds to the nodeid in the PME group */
1430 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1432 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1441 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1443 gmx_bool bReceive = TRUE;
1445 if (cr->npmenodes < dd->nnodes)
1447 gmx_domdec_comm_t *comm = dd->comm;
1448 if (comm->bCartesianPP_PME)
1451 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1453 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1454 coords[comm->cartpmedim]++;
1455 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1458 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1459 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1461 /* This is not the last PP node for pmenode */
1466 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1471 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1472 if (cr->sim_nodeid+1 < cr->nnodes &&
1473 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1475 /* This is not the last PP node for pmenode */
1484 static void set_zones_ncg_home(gmx_domdec_t *dd)
1486 gmx_domdec_zones_t *zones;
1489 zones = &dd->comm->zones;
1491 zones->cg_range[0] = 0;
1492 for (i = 1; i < zones->n+1; i++)
1494 zones->cg_range[i] = dd->ncg_home;
1496 /* zone_ncg1[0] should always be equal to ncg_home */
1497 dd->comm->zone_ncg1[0] = dd->ncg_home;
1500 static void restoreAtomGroups(gmx_domdec_t *dd,
1501 const int *gcgs_index, const t_state *state)
1503 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1505 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1506 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1508 globalAtomGroupIndices.resize(atomGroupsState.size());
1509 atomGrouping.clear();
1511 /* Copy back the global charge group indices from state
1512 * and rebuild the local charge group to atom index.
1514 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1516 const int atomGroupGlobal = atomGroupsState[i];
1517 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1518 globalAtomGroupIndices[i] = atomGroupGlobal;
1519 atomGrouping.appendBlock(groupSize);
1522 dd->ncg_home = atomGroupsState.size();
1523 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1525 set_zones_ncg_home(dd);
1528 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1529 t_forcerec *fr, char *bLocalCG)
1531 cginfo_mb_t *cginfo_mb;
1537 cginfo_mb = fr->cginfo_mb;
1538 cginfo = fr->cginfo;
1540 for (cg = cg0; cg < cg1; cg++)
1542 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1546 if (bLocalCG != nullptr)
1548 for (cg = cg0; cg < cg1; cg++)
1550 bLocalCG[index_gl[cg]] = TRUE;
1555 static void make_dd_indices(gmx_domdec_t *dd,
1556 const int *gcgs_index, int cg_start)
1558 const int numZones = dd->comm->zones.n;
1559 const int *zone2cg = dd->comm->zones.cg_range;
1560 const int *zone_ncg1 = dd->comm->zone_ncg1;
1561 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1562 const gmx_bool bCGs = dd->comm->bCGs;
1564 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1565 gmx_ga2la_t &ga2la = *dd->ga2la;
1567 if (zone2cg[1] != dd->ncg_home)
1569 gmx_incons("dd->ncg_zone is not up to date");
1572 /* Make the local to global and global to local atom index */
1573 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1574 globalAtomIndices.resize(a);
1575 for (int zone = 0; zone < numZones; zone++)
1584 cg0 = zone2cg[zone];
1586 int cg1 = zone2cg[zone+1];
1587 int cg1_p1 = cg0 + zone_ncg1[zone];
1589 for (int cg = cg0; cg < cg1; cg++)
1594 /* Signal that this cg is from more than one pulse away */
1597 int cg_gl = globalAtomGroupIndices[cg];
1600 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1602 globalAtomIndices.push_back(a_gl);
1603 ga2la.insert(a_gl, {a, zone1});
1609 globalAtomIndices.push_back(cg_gl);
1610 ga2la.insert(cg_gl, {a, zone1});
1617 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1621 if (bLocalCG == nullptr)
1625 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1627 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1630 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1635 for (int i = 0; i < ncg_sys; i++)
1642 if (ngl != dd->globalAtomGroupIndices.size())
1644 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1651 static void check_index_consistency(gmx_domdec_t *dd,
1652 int natoms_sys, int ncg_sys,
1657 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1659 if (dd->comm->DD_debug > 1)
1661 std::vector<int> have(natoms_sys);
1662 for (int a = 0; a < numAtomsInZones; a++)
1664 int globalAtomIndex = dd->globalAtomIndices[a];
1665 if (have[globalAtomIndex] > 0)
1667 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1671 have[globalAtomIndex] = a + 1;
1676 std::vector<int> have(numAtomsInZones);
1679 for (int i = 0; i < natoms_sys; i++)
1681 if (const auto entry = dd->ga2la->find(i))
1683 const int a = entry->la;
1684 if (a >= numAtomsInZones)
1686 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1692 if (dd->globalAtomIndices[a] != i)
1694 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1701 if (ngl != numAtomsInZones)
1704 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1705 dd->rank, where, ngl, numAtomsInZones);
1707 for (int a = 0; a < numAtomsInZones; a++)
1712 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1713 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1717 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1721 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1722 dd->rank, where, nerr);
1726 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1727 static void clearDDStateIndices(gmx_domdec_t *dd,
1731 gmx_ga2la_t &ga2la = *dd->ga2la;
1735 /* Clear the whole list without the overhead of searching */
1740 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1741 for (int i = 0; i < numAtomsInZones; i++)
1743 ga2la.erase(dd->globalAtomIndices[i]);
1747 char *bLocalCG = dd->comm->bLocalCG;
1750 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1752 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1756 dd_clear_local_vsite_indices(dd);
1758 if (dd->constraints)
1760 dd_clear_local_constraint_indices(dd);
1764 static bool check_grid_jump(int64_t step,
1765 const gmx_domdec_t *dd,
1767 const gmx_ddbox_t *ddbox,
1770 gmx_domdec_comm_t *comm = dd->comm;
1771 bool invalid = false;
1773 for (int d = 1; d < dd->ndim; d++)
1775 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1776 const int dim = dd->dim[d];
1777 const real limit = grid_jump_limit(comm, cutoff, d);
1778 real bfac = ddbox->box_size[dim];
1779 if (ddbox->tric_dir[dim])
1781 bfac *= ddbox->skew_fac[dim];
1783 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1784 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1792 /* This error should never be triggered under normal
1793 * circumstances, but you never know ...
1795 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1796 gmx_step_str(step, buf),
1797 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1805 static float dd_force_load(gmx_domdec_comm_t *comm)
1812 if (comm->eFlop > 1)
1814 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1819 load = comm->cycl[ddCyclF];
1820 if (comm->cycl_n[ddCyclF] > 1)
1822 /* Subtract the maximum of the last n cycle counts
1823 * to get rid of possible high counts due to other sources,
1824 * for instance system activity, that would otherwise
1825 * affect the dynamic load balancing.
1827 load -= comm->cycl_max[ddCyclF];
1831 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1833 float gpu_wait, gpu_wait_sum;
1835 gpu_wait = comm->cycl[ddCyclWaitGPU];
1836 if (comm->cycl_n[ddCyclF] > 1)
1838 /* We should remove the WaitGPU time of the same MD step
1839 * as the one with the maximum F time, since the F time
1840 * and the wait time are not independent.
1841 * Furthermore, the step for the max F time should be chosen
1842 * the same on all ranks that share the same GPU.
1843 * But to keep the code simple, we remove the average instead.
1844 * The main reason for artificially long times at some steps
1845 * is spurious CPU activity or MPI time, so we don't expect
1846 * that changes in the GPU wait time matter a lot here.
1848 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
1850 /* Sum the wait times over the ranks that share the same GPU */
1851 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1852 comm->mpi_comm_gpu_shared);
1853 /* Replace the wait time by the average over the ranks */
1854 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1862 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1864 gmx_domdec_comm_t *comm;
1869 snew(*dim_f, dd->nc[dim]+1);
1871 for (i = 1; i < dd->nc[dim]; i++)
1873 if (comm->slb_frac[dim])
1875 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1879 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1882 (*dim_f)[dd->nc[dim]] = 1;
1885 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1887 int pmeindex, slab, nso, i;
1890 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1896 ddpme->dim = dimind;
1898 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1900 ddpme->nslab = (ddpme->dim == 0 ?
1901 dd->comm->npmenodes_x :
1902 dd->comm->npmenodes_y);
1904 if (ddpme->nslab <= 1)
1909 nso = dd->comm->npmenodes/ddpme->nslab;
1910 /* Determine for each PME slab the PP location range for dimension dim */
1911 snew(ddpme->pp_min, ddpme->nslab);
1912 snew(ddpme->pp_max, ddpme->nslab);
1913 for (slab = 0; slab < ddpme->nslab; slab++)
1915 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1916 ddpme->pp_max[slab] = 0;
1918 for (i = 0; i < dd->nnodes; i++)
1920 ddindex2xyz(dd->nc, i, xyz);
1921 /* For y only use our y/z slab.
1922 * This assumes that the PME x grid size matches the DD grid size.
1924 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1926 pmeindex = ddindex2pmeindex(dd, i);
1929 slab = pmeindex/nso;
1933 slab = pmeindex % ddpme->nslab;
1935 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1936 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1940 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1943 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1945 if (dd->comm->ddpme[0].dim == XX)
1947 return dd->comm->ddpme[0].maxshift;
1955 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1957 if (dd->comm->ddpme[0].dim == YY)
1959 return dd->comm->ddpme[0].maxshift;
1961 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1963 return dd->comm->ddpme[1].maxshift;
1971 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1973 rvec cell_ns_x0, rvec cell_ns_x1,
1976 gmx_domdec_comm_t *comm;
1981 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1983 dim = dd->dim[dim_ind];
1985 /* Without PBC we don't have restrictions on the outer cells */
1986 if (!(dim >= ddbox->npbcdim &&
1987 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1989 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1990 comm->cellsize_min[dim])
1993 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1994 gmx_step_str(step, buf), dim2char(dim),
1995 comm->cell_x1[dim] - comm->cell_x0[dim],
1996 ddbox->skew_fac[dim],
1997 dd->comm->cellsize_min[dim],
1998 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2002 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2004 /* Communicate the boundaries and update cell_ns_x0/1 */
2005 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2006 if (isDlbOn(dd->comm) && dd->ndim > 1)
2008 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2013 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2015 /* Note that the cycles value can be incorrect, either 0 or some
2016 * extremely large value, when our thread migrated to another core
2017 * with an unsynchronized cycle counter. If this happens less often
2018 * that once per nstlist steps, this will not cause issues, since
2019 * we later subtract the maximum value from the sum over nstlist steps.
2020 * A zero count will slightly lower the total, but that's a small effect.
2021 * Note that the main purpose of the subtraction of the maximum value
2022 * is to avoid throwing off the load balancing when stalls occur due
2023 * e.g. system activity or network congestion.
2025 dd->comm->cycl[ddCycl] += cycles;
2026 dd->comm->cycl_n[ddCycl]++;
2027 if (cycles > dd->comm->cycl_max[ddCycl])
2029 dd->comm->cycl_max[ddCycl] = cycles;
2033 static double force_flop_count(t_nrnb *nrnb)
2040 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2042 /* To get closer to the real timings, we half the count
2043 * for the normal loops and again half it for water loops.
2046 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2048 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2052 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2055 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2058 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2060 sum += nrnb->n[i]*cost_nrnb(i);
2063 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2065 sum += nrnb->n[i]*cost_nrnb(i);
2071 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2073 if (dd->comm->eFlop)
2075 dd->comm->flop -= force_flop_count(nrnb);
2078 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2080 if (dd->comm->eFlop)
2082 dd->comm->flop += force_flop_count(nrnb);
2087 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2091 for (i = 0; i < ddCyclNr; i++)
2093 dd->comm->cycl[i] = 0;
2094 dd->comm->cycl_n[i] = 0;
2095 dd->comm->cycl_max[i] = 0;
2098 dd->comm->flop_n = 0;
2101 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2103 gmx_domdec_comm_t *comm;
2104 domdec_load_t *load;
2105 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2110 fprintf(debug, "get_load_distribution start\n");
2113 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2117 bSepPME = (dd->pme_nodeid >= 0);
2119 if (dd->ndim == 0 && bSepPME)
2121 /* Without decomposition, but with PME nodes, we need the load */
2122 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2123 comm->load[0].pme = comm->cycl[ddCyclPME];
2126 for (int d = dd->ndim - 1; d >= 0; d--)
2128 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2129 const int dim = dd->dim[d];
2130 /* Check if we participate in the communication in this dimension */
2131 if (d == dd->ndim-1 ||
2132 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2134 load = &comm->load[d];
2135 if (isDlbOn(dd->comm))
2137 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2140 if (d == dd->ndim-1)
2142 sbuf[pos++] = dd_force_load(comm);
2143 sbuf[pos++] = sbuf[0];
2144 if (isDlbOn(dd->comm))
2146 sbuf[pos++] = sbuf[0];
2147 sbuf[pos++] = cell_frac;
2150 sbuf[pos++] = cellsizes.fracLowerMax;
2151 sbuf[pos++] = cellsizes.fracUpperMin;
2156 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2157 sbuf[pos++] = comm->cycl[ddCyclPME];
2162 sbuf[pos++] = comm->load[d+1].sum;
2163 sbuf[pos++] = comm->load[d+1].max;
2164 if (isDlbOn(dd->comm))
2166 sbuf[pos++] = comm->load[d+1].sum_m;
2167 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2168 sbuf[pos++] = comm->load[d+1].flags;
2171 sbuf[pos++] = cellsizes.fracLowerMax;
2172 sbuf[pos++] = cellsizes.fracUpperMin;
2177 sbuf[pos++] = comm->load[d+1].mdf;
2178 sbuf[pos++] = comm->load[d+1].pme;
2182 /* Communicate a row in DD direction d.
2183 * The communicators are setup such that the root always has rank 0.
2186 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2187 load->load, load->nload*sizeof(float), MPI_BYTE,
2188 0, comm->mpi_comm_load[d]);
2190 if (dd->ci[dim] == dd->master_ci[dim])
2192 /* We are the master along this row, process this row */
2193 RowMaster *rowMaster = nullptr;
2197 rowMaster = cellsizes.rowMaster.get();
2207 for (int i = 0; i < dd->nc[dim]; i++)
2209 load->sum += load->load[pos++];
2210 load->max = std::max(load->max, load->load[pos]);
2212 if (isDlbOn(dd->comm))
2214 if (rowMaster->dlbIsLimited)
2216 /* This direction could not be load balanced properly,
2217 * therefore we need to use the maximum iso the average load.
2219 load->sum_m = std::max(load->sum_m, load->load[pos]);
2223 load->sum_m += load->load[pos];
2226 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2230 load->flags = gmx::roundToInt(load->load[pos++]);
2234 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2235 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2240 load->mdf = std::max(load->mdf, load->load[pos]);
2242 load->pme = std::max(load->pme, load->load[pos]);
2246 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2248 load->sum_m *= dd->nc[dim];
2249 load->flags |= (1<<d);
2257 comm->nload += dd_load_count(comm);
2258 comm->load_step += comm->cycl[ddCyclStep];
2259 comm->load_sum += comm->load[0].sum;
2260 comm->load_max += comm->load[0].max;
2263 for (int d = 0; d < dd->ndim; d++)
2265 if (comm->load[0].flags & (1<<d))
2267 comm->load_lim[d]++;
2273 comm->load_mdf += comm->load[0].mdf;
2274 comm->load_pme += comm->load[0].pme;
2278 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2282 fprintf(debug, "get_load_distribution finished\n");
2286 static float dd_force_load_fraction(gmx_domdec_t *dd)
2288 /* Return the relative performance loss on the total run time
2289 * due to the force calculation load imbalance.
2291 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2293 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2301 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2303 /* Return the relative performance loss on the total run time
2304 * due to the force calculation load imbalance.
2306 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2309 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2310 (dd->comm->load_step*dd->nnodes);
2318 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2320 gmx_domdec_comm_t *comm = dd->comm;
2322 /* Only the master rank prints loads and only if we measured loads */
2323 if (!DDMASTER(dd) || comm->nload == 0)
2329 int numPpRanks = dd->nnodes;
2330 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2331 int numRanks = numPpRanks + numPmeRanks;
2332 float lossFraction = 0;
2334 /* Print the average load imbalance and performance loss */
2335 if (dd->nnodes > 1 && comm->load_sum > 0)
2337 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2338 lossFraction = dd_force_imb_perf_loss(dd);
2340 std::string msg = "\n Dynamic load balancing report:\n";
2341 std::string dlbStateStr;
2343 switch (dd->comm->dlbState)
2345 case DlbState::offUser:
2346 dlbStateStr = "DLB was off during the run per user request.";
2348 case DlbState::offForever:
2349 /* Currectly this can happen due to performance loss observed, cell size
2350 * limitations or incompatibility with other settings observed during
2351 * determineInitialDlbState(). */
2352 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2354 case DlbState::offCanTurnOn:
2355 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2357 case DlbState::offTemporarilyLocked:
2358 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2360 case DlbState::onCanTurnOff:
2361 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2363 case DlbState::onUser:
2364 dlbStateStr = "DLB was permanently on during the run per user request.";
2367 GMX_ASSERT(false, "Undocumented DLB state");
2370 msg += " " + dlbStateStr + "\n";
2371 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2372 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2373 gmx::roundToInt(dd_force_load_fraction(dd)*100));
2374 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2376 fprintf(fplog, "%s", msg.c_str());
2377 fprintf(stderr, "%s", msg.c_str());
2380 /* Print during what percentage of steps the load balancing was limited */
2381 bool dlbWasLimited = false;
2384 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2385 for (int d = 0; d < dd->ndim; d++)
2387 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2388 sprintf(buf+strlen(buf), " %c %d %%",
2389 dim2char(dd->dim[d]), limitPercentage);
2390 if (limitPercentage >= 50)
2392 dlbWasLimited = true;
2395 sprintf(buf + strlen(buf), "\n");
2396 fprintf(fplog, "%s", buf);
2397 fprintf(stderr, "%s", buf);
2400 /* Print the performance loss due to separate PME - PP rank imbalance */
2401 float lossFractionPme = 0;
2402 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2404 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2405 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2406 if (lossFractionPme <= 0)
2408 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2412 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2414 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2415 fprintf(fplog, "%s", buf);
2416 fprintf(stderr, "%s", buf);
2417 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2418 fprintf(fplog, "%s", buf);
2419 fprintf(stderr, "%s", buf);
2421 fprintf(fplog, "\n");
2422 fprintf(stderr, "\n");
2424 if (lossFraction >= DD_PERF_LOSS_WARN)
2427 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2428 " in the domain decomposition.\n", lossFraction*100);
2431 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2433 else if (dlbWasLimited)
2435 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2437 fprintf(fplog, "%s\n", buf);
2438 fprintf(stderr, "%s\n", buf);
2440 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2443 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2444 " had %s work to do than the PP ranks.\n"
2445 " You might want to %s the number of PME ranks\n"
2446 " or %s the cut-off and the grid spacing.\n",
2447 std::fabs(lossFractionPme*100),
2448 (lossFractionPme < 0) ? "less" : "more",
2449 (lossFractionPme < 0) ? "decrease" : "increase",
2450 (lossFractionPme < 0) ? "decrease" : "increase");
2451 fprintf(fplog, "%s\n", buf);
2452 fprintf(stderr, "%s\n", buf);
2456 static float dd_vol_min(gmx_domdec_t *dd)
2458 return dd->comm->load[0].cvol_min*dd->nnodes;
2461 static int dd_load_flags(gmx_domdec_t *dd)
2463 return dd->comm->load[0].flags;
2466 static float dd_f_imbal(gmx_domdec_t *dd)
2468 if (dd->comm->load[0].sum > 0)
2470 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2474 /* Something is wrong in the cycle counting, report no load imbalance */
2479 float dd_pme_f_ratio(gmx_domdec_t *dd)
2481 /* Should only be called on the DD master rank */
2482 assert(DDMASTER(dd));
2484 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2486 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2495 dd_print_load(gmx_domdec_t *dd,
2498 gmx::StringOutputStream stream;
2499 gmx::TextWriter log(&stream);
2501 int flags = dd_load_flags(dd);
2504 log.writeString("DD load balancing is limited by minimum cell size in dimension");
2505 for (int d = 0; d < dd->ndim; d++)
2509 log.writeStringFormatted(" %c", dim2char(dd->dim[d]));
2512 log.ensureLineBreak();
2514 log.writeString("DD step %s" + gmx::toString(step));
2515 if (isDlbOn(dd->comm))
2517 log.writeStringFormatted(" vol min/aver %5.3f%c",
2518 dd_vol_min(dd), flags ? '!' : ' ');
2522 log.writeStringFormatted(" load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2524 if (dd->comm->cycl_n[ddCyclPME])
2526 log.writeStringFormatted(" pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2528 log.ensureLineBreak();
2529 return stream.toString();
2532 static void dd_print_load_verbose(gmx_domdec_t *dd)
2534 if (isDlbOn(dd->comm))
2536 fprintf(stderr, "vol %4.2f%c ",
2537 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2541 fprintf(stderr, "imb F %2d%% ", gmx::roundToInt(dd_f_imbal(dd)*100));
2543 if (dd->comm->cycl_n[ddCyclPME])
2545 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2550 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2555 gmx_bool bPartOfGroup = FALSE;
2557 dim = dd->dim[dim_ind];
2558 copy_ivec(loc, loc_c);
2559 for (i = 0; i < dd->nc[dim]; i++)
2562 rank = dd_index(dd->nc, loc_c);
2563 if (rank == dd->rank)
2565 /* This process is part of the group */
2566 bPartOfGroup = TRUE;
2569 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2573 dd->comm->mpi_comm_load[dim_ind] = c_row;
2574 if (!isDlbDisabled(dd->comm))
2576 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2578 if (dd->ci[dim] == dd->master_ci[dim])
2580 /* This is the root process of this row */
2581 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2583 RowMaster &rowMaster = *cellsizes.rowMaster;
2584 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2585 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2586 rowMaster.isCellMin.resize(dd->nc[dim]);
2589 rowMaster.bounds.resize(dd->nc[dim]);
2591 rowMaster.buf_ncd.resize(dd->nc[dim]);
2595 /* This is not a root process, we only need to receive cell_f */
2596 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2599 if (dd->ci[dim] == dd->master_ci[dim])
2601 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2607 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2611 int physicalnode_id_hash;
2613 MPI_Comm mpi_comm_pp_physicalnode;
2615 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2617 /* Only ranks with short-ranged tasks (currently) use GPUs.
2618 * If we don't have GPUs assigned, there are no resources to share.
2623 physicalnode_id_hash = gmx_physicalnode_id_hash();
2629 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2630 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2631 dd->rank, physicalnode_id_hash, gpu_id);
2633 /* Split the PP communicator over the physical nodes */
2634 /* TODO: See if we should store this (before), as it's also used for
2635 * for the nodecomm summation.
2637 // TODO PhysicalNodeCommunicator could be extended/used to handle
2638 // the need for per-node per-group communicators.
2639 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2640 &mpi_comm_pp_physicalnode);
2641 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2642 &dd->comm->mpi_comm_gpu_shared);
2643 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2644 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2648 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2651 /* Note that some ranks could share a GPU, while others don't */
2653 if (dd->comm->nrank_gpu_shared == 1)
2655 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2658 GMX_UNUSED_VALUE(cr);
2659 GMX_UNUSED_VALUE(gpu_id);
2663 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2666 int dim0, dim1, i, j;
2671 fprintf(debug, "Making load communicators\n");
2674 snew(dd->comm->load, std::max(dd->ndim, 1));
2675 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2683 make_load_communicator(dd, 0, loc);
2687 for (i = 0; i < dd->nc[dim0]; i++)
2690 make_load_communicator(dd, 1, loc);
2696 for (i = 0; i < dd->nc[dim0]; i++)
2700 for (j = 0; j < dd->nc[dim1]; j++)
2703 make_load_communicator(dd, 2, loc);
2710 fprintf(debug, "Finished making load communicators\n");
2715 /*! \brief Sets up the relation between neighboring domains and zones */
2716 static void setup_neighbor_relations(gmx_domdec_t *dd)
2718 int d, dim, i, j, m;
2720 gmx_domdec_zones_t *zones;
2721 gmx_domdec_ns_ranges_t *izone;
2723 for (d = 0; d < dd->ndim; d++)
2726 copy_ivec(dd->ci, tmp);
2727 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2728 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2729 copy_ivec(dd->ci, tmp);
2730 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2731 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2734 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2737 dd->neighbor[d][1]);
2741 int nzone = (1 << dd->ndim);
2742 int nizone = (1 << std::max(dd->ndim - 1, 0));
2743 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2745 zones = &dd->comm->zones;
2747 for (i = 0; i < nzone; i++)
2750 clear_ivec(zones->shift[i]);
2751 for (d = 0; d < dd->ndim; d++)
2753 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2758 for (i = 0; i < nzone; i++)
2760 for (d = 0; d < DIM; d++)
2762 s[d] = dd->ci[d] - zones->shift[i][d];
2767 else if (s[d] >= dd->nc[d])
2773 zones->nizone = nizone;
2774 for (i = 0; i < zones->nizone; i++)
2776 assert(ddNonbondedZonePairRanges[i][0] == i);
2778 izone = &zones->izone[i];
2779 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2780 * j-zones up to nzone.
2782 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2783 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2784 for (dim = 0; dim < DIM; dim++)
2786 if (dd->nc[dim] == 1)
2788 /* All shifts should be allowed */
2789 izone->shift0[dim] = -1;
2790 izone->shift1[dim] = 1;
2794 /* Determine the min/max j-zone shift wrt the i-zone */
2795 izone->shift0[dim] = 1;
2796 izone->shift1[dim] = -1;
2797 for (j = izone->j0; j < izone->j1; j++)
2799 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2800 if (shift_diff < izone->shift0[dim])
2802 izone->shift0[dim] = shift_diff;
2804 if (shift_diff > izone->shift1[dim])
2806 izone->shift1[dim] = shift_diff;
2813 if (!isDlbDisabled(dd->comm))
2815 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2818 if (dd->comm->bRecordLoad)
2820 make_load_communicators(dd);
2824 static void make_pp_communicator(const gmx::MDLogger &mdlog,
2826 t_commrec gmx_unused *cr,
2827 bool gmx_unused reorder)
2830 gmx_domdec_comm_t *comm;
2837 if (comm->bCartesianPP)
2839 /* Set up cartesian communication for the particle-particle part */
2840 GMX_LOG(mdlog.info).appendTextFormatted(
2841 "Will use a Cartesian communicator: %d x %d x %d",
2842 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2844 for (int i = 0; i < DIM; i++)
2848 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
2850 /* We overwrite the old communicator with the new cartesian one */
2851 cr->mpi_comm_mygroup = comm_cart;
2854 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2855 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2857 if (comm->bCartesianPP_PME)
2859 /* Since we want to use the original cartesian setup for sim,
2860 * and not the one after split, we need to make an index.
2862 snew(comm->ddindex2ddnodeid, dd->nnodes);
2863 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2864 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2865 /* Get the rank of the DD master,
2866 * above we made sure that the master node is a PP node.
2876 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2878 else if (comm->bCartesianPP)
2880 if (cr->npmenodes == 0)
2882 /* The PP communicator is also
2883 * the communicator for this simulation
2885 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2887 cr->nodeid = dd->rank;
2889 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2891 /* We need to make an index to go from the coordinates
2892 * to the nodeid of this simulation.
2894 snew(comm->ddindex2simnodeid, dd->nnodes);
2895 snew(buf, dd->nnodes);
2896 if (thisRankHasDuty(cr, DUTY_PP))
2898 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2900 /* Communicate the ddindex to simulation nodeid index */
2901 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2902 cr->mpi_comm_mysim);
2905 /* Determine the master coordinates and rank.
2906 * The DD master should be the same node as the master of this sim.
2908 for (int i = 0; i < dd->nnodes; i++)
2910 if (comm->ddindex2simnodeid[i] == 0)
2912 ddindex2xyz(dd->nc, i, dd->master_ci);
2913 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2918 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2923 /* No Cartesian communicators */
2924 /* We use the rank in dd->comm->all as DD index */
2925 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2926 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2928 clear_ivec(dd->master_ci);
2932 GMX_LOG(mdlog.info).appendTextFormatted(
2933 "Domain decomposition rank %d, coordinates %d %d %d\n",
2934 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2938 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2939 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2943 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2947 gmx_domdec_comm_t *comm = dd->comm;
2949 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2952 snew(comm->ddindex2simnodeid, dd->nnodes);
2953 snew(buf, dd->nnodes);
2954 if (thisRankHasDuty(cr, DUTY_PP))
2956 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2958 /* Communicate the ddindex to simulation nodeid index */
2959 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2960 cr->mpi_comm_mysim);
2964 GMX_UNUSED_VALUE(dd);
2965 GMX_UNUSED_VALUE(cr);
2969 static void split_communicator(const gmx::MDLogger &mdlog,
2970 t_commrec *cr, gmx_domdec_t *dd,
2971 DdRankOrder gmx_unused rankOrder,
2972 bool gmx_unused reorder)
2974 gmx_domdec_comm_t *comm;
2983 if (comm->bCartesianPP)
2985 for (i = 1; i < DIM; i++)
2987 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2989 if (bDiv[YY] || bDiv[ZZ])
2991 comm->bCartesianPP_PME = TRUE;
2992 /* If we have 2D PME decomposition, which is always in x+y,
2993 * we stack the PME only nodes in z.
2994 * Otherwise we choose the direction that provides the thinnest slab
2995 * of PME only nodes as this will have the least effect
2996 * on the PP communication.
2997 * But for the PME communication the opposite might be better.
2999 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3001 dd->nc[YY] > dd->nc[ZZ]))
3003 comm->cartpmedim = ZZ;
3007 comm->cartpmedim = YY;
3009 comm->ntot[comm->cartpmedim]
3010 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3014 GMX_LOG(mdlog.info).appendTextFormatted(
3015 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
3016 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3017 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
3021 if (comm->bCartesianPP_PME)
3027 GMX_LOG(mdlog.info).appendTextFormatted(
3028 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
3029 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3031 for (i = 0; i < DIM; i++)
3035 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
3037 MPI_Comm_rank(comm_cart, &rank);
3038 if (MASTER(cr) && rank != 0)
3040 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3043 /* With this assigment we loose the link to the original communicator
3044 * which will usually be MPI_COMM_WORLD, unless have multisim.
3046 cr->mpi_comm_mysim = comm_cart;
3047 cr->sim_nodeid = rank;
3049 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3051 GMX_LOG(mdlog.info).appendTextFormatted(
3052 "Cartesian rank %d, coordinates %d %d %d\n",
3053 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3055 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3059 if (cr->npmenodes == 0 ||
3060 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3062 cr->duty = DUTY_PME;
3065 /* Split the sim communicator into PP and PME only nodes */
3066 MPI_Comm_split(cr->mpi_comm_mysim,
3067 getThisRankDuties(cr),
3068 dd_index(comm->ntot, dd->ci),
3069 &cr->mpi_comm_mygroup);
3076 case DdRankOrder::pp_pme:
3077 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
3079 case DdRankOrder::interleave:
3080 /* Interleave the PP-only and PME-only ranks */
3081 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
3082 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3084 case DdRankOrder::cartesian:
3087 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3090 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3092 cr->duty = DUTY_PME;
3099 /* Split the sim communicator into PP and PME only nodes */
3100 MPI_Comm_split(cr->mpi_comm_mysim,
3101 getThisRankDuties(cr),
3103 &cr->mpi_comm_mygroup);
3104 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3108 GMX_LOG(mdlog.info).appendTextFormatted(
3109 "This rank does only %s work.\n",
3110 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3113 /*! \brief Generates the MPI communicators for domain decomposition */
3114 static void make_dd_communicators(const gmx::MDLogger &mdlog,
3116 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3118 gmx_domdec_comm_t *comm;
3123 copy_ivec(dd->nc, comm->ntot);
3125 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3126 comm->bCartesianPP_PME = FALSE;
3128 /* Reorder the nodes by default. This might change the MPI ranks.
3129 * Real reordering is only supported on very few architectures,
3130 * Blue Gene is one of them.
3132 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
3134 if (cr->npmenodes > 0)
3136 /* Split the communicator into a PP and PME part */
3137 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
3138 if (comm->bCartesianPP_PME)
3140 /* We (possibly) reordered the nodes in split_communicator,
3141 * so it is no longer required in make_pp_communicator.
3143 CartReorder = FALSE;
3148 /* All nodes do PP and PME */
3150 /* We do not require separate communicators */
3151 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3155 if (thisRankHasDuty(cr, DUTY_PP))
3157 /* Copy or make a new PP communicator */
3158 make_pp_communicator(mdlog, dd, cr, CartReorder);
3162 receive_ddindex2simnodeid(dd, cr);
3165 if (!thisRankHasDuty(cr, DUTY_PME))
3167 /* Set up the commnuication to our PME node */
3168 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3169 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3172 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3173 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3178 dd->pme_nodeid = -1;
3183 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3185 comm->cgs_gl.index[comm->cgs_gl.nr]);
3189 static real *get_slb_frac(const gmx::MDLogger &mdlog,
3190 const char *dir, int nc, const char *size_string)
3192 real *slb_frac, tot;
3197 if (nc > 1 && size_string != nullptr)
3199 GMX_LOG(mdlog.info).appendTextFormatted(
3200 "Using static load balancing for the %s direction", dir);
3203 for (i = 0; i < nc; i++)
3206 sscanf(size_string, "%20lf%n", &dbl, &n);
3209 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3216 std::string relativeCellSizes = "Relative cell sizes:";
3217 for (i = 0; i < nc; i++)
3220 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
3222 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
3228 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3231 gmx_mtop_ilistloop_t iloop;
3232 const InteractionList *il;
3235 iloop = gmx_mtop_ilistloop_init(mtop);
3236 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3238 for (ftype = 0; ftype < F_NRE; ftype++)
3240 if ((interaction_function[ftype].flags & IF_BOND) &&
3243 n += nmol*il[ftype].size()/(1 + NRAL(ftype));
3251 static int dd_getenv(const gmx::MDLogger &mdlog,
3252 const char *env_var, int def)
3258 val = getenv(env_var);
3261 if (sscanf(val, "%20d", &nst) <= 0)
3265 GMX_LOG(mdlog.info).appendTextFormatted(
3266 "Found env.var. %s = %s, using value %d",
3273 static void check_dd_restrictions(const gmx_domdec_t *dd,
3274 const t_inputrec *ir,
3275 const gmx::MDLogger &mdlog)
3277 if (ir->ePBC == epbcSCREW &&
3278 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3280 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3283 if (ir->ns_type == ensSIMPLE)
3285 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3288 if (ir->nstlist == 0)
3290 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3293 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3295 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3299 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3304 r = ddbox->box_size[XX];
3305 for (di = 0; di < dd->ndim; di++)
3308 /* Check using the initial average cell size */
3309 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3315 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3317 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
3318 const std::string &reasonStr,
3319 const gmx::MDLogger &mdlog)
3321 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3322 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3324 if (cmdlineDlbState == DlbState::onUser)
3326 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3328 else if (cmdlineDlbState == DlbState::offCanTurnOn)
3330 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
3332 return DlbState::offForever;
3335 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3337 * This function parses the parameters of "-dlb" command line option setting
3338 * corresponding state values. Then it checks the consistency of the determined
3339 * state with other run parameters and settings. As a result, the initial state
3340 * may be altered or an error may be thrown if incompatibility of options is detected.
3342 * \param [in] mdlog Logger.
3343 * \param [in] dlbOption Enum value for the DLB option.
3344 * \param [in] bRecordLoad True if the load balancer is recording load information.
3345 * \param [in] mdrunOptions Options for mdrun.
3346 * \param [in] ir Pointer mdrun to input parameters.
3347 * \returns DLB initial/startup state.
3349 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
3350 DlbOption dlbOption, gmx_bool bRecordLoad,
3351 const MdrunOptions &mdrunOptions,
3352 const t_inputrec *ir)
3354 DlbState dlbState = DlbState::offCanTurnOn;
3358 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
3359 case DlbOption::no: dlbState = DlbState::offUser; break;
3360 case DlbOption::yes: dlbState = DlbState::onUser; break;
3361 default: gmx_incons("Invalid dlbOption enum value");
3364 /* Reruns don't support DLB: bail or override auto mode */
3365 if (mdrunOptions.rerun)
3367 std::string reasonStr = "it is not supported in reruns.";
3368 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3371 /* Unsupported integrators */
3372 if (!EI_DYNAMICS(ir->eI))
3374 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3375 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3378 /* Without cycle counters we can't time work to balance on */
3381 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3382 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3385 if (mdrunOptions.reproducible)
3387 std::string reasonStr = "you started a reproducible run.";
3390 case DlbState::offUser:
3392 case DlbState::offForever:
3393 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
3395 case DlbState::offCanTurnOn:
3396 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3397 case DlbState::onCanTurnOff:
3398 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
3400 case DlbState::onUser:
3401 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
3403 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
3410 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
3413 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3415 /* Decomposition order z,y,x */
3416 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
3417 for (int dim = DIM-1; dim >= 0; dim--)
3419 if (dd->nc[dim] > 1)
3421 dd->dim[dd->ndim++] = dim;
3427 /* Decomposition order x,y,z */
3428 for (int dim = 0; dim < DIM; dim++)
3430 if (dd->nc[dim] > 1)
3432 dd->dim[dd->ndim++] = dim;
3439 /* Set dim[0] to avoid extra checks on ndim in several places */
3444 static gmx_domdec_comm_t *init_dd_comm()
3446 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3448 comm->n_load_have = 0;
3449 comm->n_load_collect = 0;
3451 comm->haveTurnedOffDlb = false;
3453 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3455 comm->sum_nat[i] = 0;
3459 comm->load_step = 0;
3462 clear_ivec(comm->load_lim);
3466 /* This should be replaced by a unique pointer */
3467 comm->balanceRegion = ddBalanceRegionAllocate();
3472 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3473 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
3474 t_commrec *cr, gmx_domdec_t *dd,
3475 const DomdecOptions &options,
3476 const MdrunOptions &mdrunOptions,
3477 const gmx_mtop_t *mtop,
3478 const t_inputrec *ir,
3480 gmx::ArrayRef<const gmx::RVec> xGlobal,
3484 real r_bonded_limit = -1;
3485 const real tenPercentMargin = 1.1;
3486 gmx_domdec_comm_t *comm = dd->comm;
3488 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3489 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3491 dd->pme_recv_f_alloc = 0;
3492 dd->pme_recv_f_buf = nullptr;
3494 /* Initialize to GPU share count to 0, might change later */
3495 comm->nrank_gpu_shared = 0;
3497 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3498 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3499 /* To consider turning DLB on after 2*nstlist steps we need to check
3500 * at partitioning count 3. Thus we need to increase the first count by 2.
3502 comm->ddPartioningCountFirstDlbOff += 2;
3504 GMX_LOG(mdlog.info).appendTextFormatted(
3505 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
3507 comm->bPMELoadBalDLBLimits = FALSE;
3509 /* Allocate the charge group/atom sorting struct */
3510 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3512 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3514 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3515 mtop->bIntermolecularInteractions);
3516 if (comm->bInterCGBondeds)
3518 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3522 comm->bInterCGMultiBody = FALSE;
3525 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3526 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3530 /* Set the cut-off to some very large value,
3531 * so we don't need if statements everywhere in the code.
3532 * We use sqrt, since the cut-off is squared in some places.
3534 comm->cutoff = GMX_CUTOFF_INF;
3538 comm->cutoff = ir->rlist;
3540 comm->cutoff_mbody = 0;
3542 /* Determine the minimum cell size limit, affected by many factors */
3543 comm->cellsize_limit = 0;
3544 comm->bBondComm = FALSE;
3546 /* We do not allow home atoms to move beyond the neighboring domain
3547 * between domain decomposition steps, which limits the cell size.
3548 * Get an estimate of cell size limit due to atom displacement.
3549 * In most cases this is a large overestimate, because it assumes
3550 * non-interaction atoms.
3551 * We set the chance to 1 in a trillion steps.
3553 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
3554 const real limitForAtomDisplacement =
3555 minCellSizeForAtomDisplacement(*mtop, *ir, c_chanceThatAtomMovesBeyondDomain);
3556 GMX_LOG(mdlog.info).appendTextFormatted(
3557 "Minimum cell size due to atom displacement: %.3f nm",
3558 limitForAtomDisplacement);
3560 comm->cellsize_limit = std::max(comm->cellsize_limit,
3561 limitForAtomDisplacement);
3563 /* TODO: PME decomposition currently requires atoms not to be more than
3564 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
3565 * In nearly all cases, limitForAtomDisplacement will be smaller
3566 * than 2/3*rlist, so the PME requirement is satisfied.
3567 * But it would be better for both correctness and performance
3568 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
3569 * Note that we would need to improve the pairlist buffer case.
3572 if (comm->bInterCGBondeds)
3574 if (options.minimumCommunicationRange > 0)
3576 comm->cutoff_mbody = options.minimumCommunicationRange;
3577 if (options.useBondedCommunication)
3579 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3583 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3585 r_bonded_limit = comm->cutoff_mbody;
3587 else if (ir->bPeriodicMols)
3589 /* Can not easily determine the required cut-off */
3590 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
3591 comm->cutoff_mbody = comm->cutoff/2;
3592 r_bonded_limit = comm->cutoff_mbody;
3600 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3601 options.checkBondedInteractions,
3604 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3605 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3607 /* We use an initial margin of 10% for the minimum cell size,
3608 * except when we are just below the non-bonded cut-off.
3610 if (options.useBondedCommunication)
3612 if (std::max(r_2b, r_mb) > comm->cutoff)
3614 r_bonded = std::max(r_2b, r_mb);
3615 r_bonded_limit = tenPercentMargin*r_bonded;
3616 comm->bBondComm = TRUE;
3621 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3623 /* We determine cutoff_mbody later */
3627 /* No special bonded communication,
3628 * simply increase the DD cut-off.
3630 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3631 comm->cutoff_mbody = r_bonded_limit;
3632 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3635 GMX_LOG(mdlog.info).appendTextFormatted(
3636 "Minimum cell size due to bonded interactions: %.3f nm",
3639 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3643 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3645 /* There is a cell size limit due to the constraints (P-LINCS) */
3646 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
3647 GMX_LOG(mdlog.info).appendTextFormatted(
3648 "Estimated maximum distance required for P-LINCS: %.3f nm",
3650 if (rconstr > comm->cellsize_limit)
3652 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
3655 else if (options.constraintCommunicationRange > 0)
3657 /* Here we do not check for dd->bInterCGcons,
3658 * because one can also set a cell size limit for virtual sites only
3659 * and at this point we don't know yet if there are intercg v-sites.
3661 GMX_LOG(mdlog.info).appendTextFormatted(
3662 "User supplied maximum distance required for P-LINCS: %.3f nm",
3663 options.constraintCommunicationRange);
3664 rconstr = options.constraintCommunicationRange;
3666 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3668 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3670 if (options.numCells[XX] > 0)
3672 copy_ivec(options.numCells, dd->nc);
3673 set_dd_dim(mdlog, dd);
3674 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3676 if (options.numPmeRanks >= 0)
3678 cr->npmenodes = options.numPmeRanks;
3682 /* When the DD grid is set explicitly and -npme is set to auto,
3683 * don't use PME ranks. We check later if the DD grid is
3684 * compatible with the total number of ranks.
3689 real acs = average_cellsize_min(dd, ddbox);
3690 if (acs < comm->cellsize_limit)
3692 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3693 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3694 acs, comm->cellsize_limit);
3699 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3701 /* We need to choose the optimal DD grid and possibly PME nodes */
3703 dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
3704 options.numPmeRanks,
3705 !isDlbDisabled(comm),
3707 comm->cellsize_limit, comm->cutoff,
3708 comm->bInterCGBondeds);
3710 if (dd->nc[XX] == 0)
3713 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3714 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3715 !bC ? "-rdd" : "-rcon",
3716 comm->dlbState != DlbState::offUser ? " or -dds" : "",
3717 bC ? " or your LINCS settings" : "");
3719 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3720 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3722 "Look in the log file for details on the domain decomposition",
3723 cr->nnodes-cr->npmenodes, limit, buf);
3725 set_dd_dim(mdlog, dd);
3728 GMX_LOG(mdlog.info).appendTextFormatted(
3729 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
3730 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3732 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3733 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3735 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3736 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3737 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3739 if (cr->npmenodes > dd->nnodes)
3741 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3742 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3744 if (cr->npmenodes > 0)
3746 comm->npmenodes = cr->npmenodes;
3750 comm->npmenodes = dd->nnodes;
3753 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3755 /* The following choices should match those
3756 * in comm_cost_est in domdec_setup.c.
3757 * Note that here the checks have to take into account
3758 * that the decomposition might occur in a different order than xyz
3759 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3760 * in which case they will not match those in comm_cost_est,
3761 * but since that is mainly for testing purposes that's fine.
3763 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3764 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3765 getenv("GMX_PMEONEDD") == nullptr)
3767 comm->npmedecompdim = 2;
3768 comm->npmenodes_x = dd->nc[XX];
3769 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3773 /* In case nc is 1 in both x and y we could still choose to
3774 * decompose pme in y instead of x, but we use x for simplicity.
3776 comm->npmedecompdim = 1;
3777 if (dd->dim[0] == YY)
3779 comm->npmenodes_x = 1;
3780 comm->npmenodes_y = comm->npmenodes;
3784 comm->npmenodes_x = comm->npmenodes;
3785 comm->npmenodes_y = 1;
3788 GMX_LOG(mdlog.info).appendTextFormatted(
3789 "PME domain decomposition: %d x %d x %d",
3790 comm->npmenodes_x, comm->npmenodes_y, 1);
3794 comm->npmedecompdim = 0;
3795 comm->npmenodes_x = 0;
3796 comm->npmenodes_y = 0;
3799 snew(comm->slb_frac, DIM);
3800 if (isDlbDisabled(comm))
3802 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
3803 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
3804 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
3807 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3809 if (comm->bBondComm || !isDlbDisabled(comm))
3811 /* Set the bonded communication distance to halfway
3812 * the minimum and the maximum,
3813 * since the extra communication cost is nearly zero.
3815 real acs = average_cellsize_min(dd, ddbox);
3816 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3817 if (!isDlbDisabled(comm))
3819 /* Check if this does not limit the scaling */
3820 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3821 options.dlbScaling*acs);
3823 if (!comm->bBondComm)
3825 /* Without bBondComm do not go beyond the n.b. cut-off */
3826 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3827 if (comm->cellsize_limit >= comm->cutoff)
3829 /* We don't loose a lot of efficieny
3830 * when increasing it to the n.b. cut-off.
3831 * It can even be slightly faster, because we need
3832 * less checks for the communication setup.
3834 comm->cutoff_mbody = comm->cutoff;
3837 /* Check if we did not end up below our original limit */
3838 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3840 if (comm->cutoff_mbody > comm->cellsize_limit)
3842 comm->cellsize_limit = comm->cutoff_mbody;
3845 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3850 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3851 "cellsize limit %f\n",
3852 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3855 check_dd_restrictions(dd, ir, mdlog);
3858 static void set_dlb_limits(gmx_domdec_t *dd)
3861 for (int d = 0; d < dd->ndim; d++)
3863 /* Set the number of pulses to the value for DLB */
3864 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3866 dd->comm->cellsize_min[dd->dim[d]] =
3867 dd->comm->cellsize_min_dlb[dd->dim[d]];
3872 static void turn_on_dlb(const gmx::MDLogger &mdlog,
3876 gmx_domdec_comm_t *comm = dd->comm;
3878 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3879 for (int d = 1; d < dd->ndim; d++)
3881 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3884 /* Turn off DLB if we're too close to the cell size limit. */
3885 if (cellsize_min < comm->cellsize_limit*1.05)
3887 GMX_LOG(mdlog.info).appendTextFormatted(
3888 "step %s Measured %.1f %% performance loss due to load imbalance, "
3889 "but the minimum cell size is smaller than 1.05 times the cell size limit. "
3890 "Will no longer try dynamic load balancing.",
3891 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3893 comm->dlbState = DlbState::offForever;
3897 GMX_LOG(mdlog.info).appendTextFormatted(
3898 "step %s Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.",
3899 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3900 comm->dlbState = DlbState::onCanTurnOff;
3902 /* Store the non-DLB performance, so we can check if DLB actually
3903 * improves performance.
3905 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3906 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3910 /* We can set the required cell size info here,
3911 * so we do not need to communicate this.
3912 * The grid is completely uniform.
3914 for (int d = 0; d < dd->ndim; d++)
3916 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3920 comm->load[d].sum_m = comm->load[d].sum;
3922 int nc = dd->nc[dd->dim[d]];
3923 for (int i = 0; i < nc; i++)
3925 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3928 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3929 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3932 rowMaster->cellFrac[nc] = 1.0;
3937 static void turn_off_dlb(const gmx::MDLogger &mdlog,
3941 GMX_LOG(mdlog.info).appendText(
3942 "step " + gmx::toString(step) + " Turning off dynamic load balancing, because it is degrading performance.");
3943 dd->comm->dlbState = DlbState::offCanTurnOn;
3944 dd->comm->haveTurnedOffDlb = true;
3945 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3948 static void turn_off_dlb_forever(const gmx::MDLogger &mdlog,
3952 GMX_RELEASE_ASSERT(dd->comm->dlbState == DlbState::offCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3953 GMX_LOG(mdlog.info).appendText(
3954 "step " + gmx::toString(step) + " Will no longer try dynamic load balancing, as it degraded performance.");
3955 dd->comm->dlbState = DlbState::offForever;
3958 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3963 ncg = ncg_mtop(mtop);
3964 snew(bLocalCG, ncg);
3965 for (cg = 0; cg < ncg; cg++)
3967 bLocalCG[cg] = FALSE;
3973 void dd_init_bondeds(FILE *fplog,
3975 const gmx_mtop_t *mtop,
3976 const gmx_vsite_t *vsite,
3977 const t_inputrec *ir,
3978 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
3980 gmx_domdec_comm_t *comm;
3982 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
3986 if (comm->bBondComm)
3988 /* Communicate atoms beyond the cut-off for bonded interactions */
3991 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
3993 comm->bLocalCG = init_bLocalCG(mtop);
3997 /* Only communicate atoms based on cut-off */
3998 comm->cglink = nullptr;
3999 comm->bLocalCG = nullptr;
4003 static void writeSettings(gmx::TextWriter *log,
4005 const gmx_mtop_t *mtop,
4006 const t_inputrec *ir,
4007 gmx_bool bDynLoadBal,
4009 const gmx_ddbox_t *ddbox)
4011 gmx_domdec_comm_t *comm;
4020 log->writeString("The maximum number of communication pulses is:");
4021 for (d = 0; d < dd->ndim; d++)
4023 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4025 log->ensureLineBreak();
4026 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
4027 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
4028 log->writeString("The allowed shrink of domain decomposition cells is:");
4029 for (d = 0; d < DIM; d++)
4033 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4040 comm->cellsize_min_dlb[d]/
4041 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4043 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
4046 log->ensureLineBreak();
4050 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4051 log->writeString("The initial number of communication pulses is:");
4052 for (d = 0; d < dd->ndim; d++)
4054 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4056 log->ensureLineBreak();
4057 log->writeString("The initial domain decomposition cell size is:");
4058 for (d = 0; d < DIM; d++)
4062 log->writeStringFormatted(" %c %.2f nm",
4063 dim2char(d), dd->comm->cellsize_min[d]);
4066 log->ensureLineBreak();
4070 gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
4072 if (comm->bInterCGBondeds ||
4074 dd->bInterCGcons || dd->bInterCGsettles)
4076 log->writeLine("The maximum allowed distance for charge groups involved in interactions is:");
4077 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
4081 limit = dd->comm->cellsize_limit;
4085 if (dynamic_dd_box(ddbox, ir))
4087 log->writeLine("(the following are initial values, they could change due to box deformation)");
4089 limit = dd->comm->cellsize_min[XX];
4090 for (d = 1; d < DIM; d++)
4092 limit = std::min(limit, dd->comm->cellsize_min[d]);
4096 if (comm->bInterCGBondeds)
4098 log->writeLineFormatted("%40s %-7s %6.3f nm",
4099 "two-body bonded interactions", "(-rdd)",
4100 std::max(comm->cutoff, comm->cutoff_mbody));
4101 log->writeLineFormatted("%40s %-7s %6.3f nm",
4102 "multi-body bonded interactions", "(-rdd)",
4103 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4107 log->writeLineFormatted("%40s %-7s %6.3f nm",
4108 "virtual site constructions", "(-rcon)", limit);
4110 if (dd->bInterCGcons || dd->bInterCGsettles)
4112 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
4114 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
4115 separation.c_str(), "(-rcon)", limit);
4117 log->ensureLineBreak();
4121 static void logSettings(const gmx::MDLogger &mdlog,
4123 const gmx_mtop_t *mtop,
4124 const t_inputrec *ir,
4126 const gmx_ddbox_t *ddbox)
4128 gmx::StringOutputStream stream;
4129 gmx::TextWriter log(&stream);
4130 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
4131 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4134 log.ensureEmptyLine();
4135 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
4137 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
4139 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
4142 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
4145 const t_inputrec *ir,
4146 const gmx_ddbox_t *ddbox)
4148 gmx_domdec_comm_t *comm;
4149 int d, dim, npulse, npulse_d_max, npulse_d;
4154 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4156 /* Determine the maximum number of comm. pulses in one dimension */
4158 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4160 /* Determine the maximum required number of grid pulses */
4161 if (comm->cellsize_limit >= comm->cutoff)
4163 /* Only a single pulse is required */
4166 else if (!bNoCutOff && comm->cellsize_limit > 0)
4168 /* We round down slightly here to avoid overhead due to the latency
4169 * of extra communication calls when the cut-off
4170 * would be only slightly longer than the cell size.
4171 * Later cellsize_limit is redetermined,
4172 * so we can not miss interactions due to this rounding.
4174 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
4178 /* There is no cell size limit */
4179 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4182 if (!bNoCutOff && npulse > 1)
4184 /* See if we can do with less pulses, based on dlb_scale */
4186 for (d = 0; d < dd->ndim; d++)
4189 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
4190 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4191 npulse_d_max = std::max(npulse_d_max, npulse_d);
4193 npulse = std::min(npulse, npulse_d_max);
4196 /* This env var can override npulse */
4197 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
4204 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4205 for (d = 0; d < dd->ndim; d++)
4207 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4208 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4209 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4211 comm->bVacDLBNoLimit = FALSE;
4215 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4216 if (!comm->bVacDLBNoLimit)
4218 comm->cellsize_limit = std::max(comm->cellsize_limit,
4219 comm->cutoff/comm->maxpulse);
4221 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4222 /* Set the minimum cell size for each DD dimension */
4223 for (d = 0; d < dd->ndim; d++)
4225 if (comm->bVacDLBNoLimit ||
4226 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4228 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4232 comm->cellsize_min_dlb[dd->dim[d]] =
4233 comm->cutoff/comm->cd[d].np_dlb;
4236 if (comm->cutoff_mbody <= 0)
4238 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4246 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4248 /* If each molecule is a single charge group
4249 * or we use domain decomposition for each periodic dimension,
4250 * we do not need to take pbc into account for the bonded interactions.
4252 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4255 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4258 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4259 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
4260 gmx_domdec_t *dd, real dlb_scale,
4261 const gmx_mtop_t *mtop, const t_inputrec *ir,
4262 const gmx_ddbox_t *ddbox)
4264 gmx_domdec_comm_t *comm;
4270 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4272 init_ddpme(dd, &comm->ddpme[0], 0);
4273 if (comm->npmedecompdim >= 2)
4275 init_ddpme(dd, &comm->ddpme[1], 1);
4280 comm->npmenodes = 0;
4281 if (dd->pme_nodeid >= 0)
4283 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4284 "Can not have separate PME ranks without PME electrostatics");
4290 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4292 if (!isDlbDisabled(comm))
4294 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
4297 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
4299 if (ir->ePBC == epbcNONE)
4301 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
4306 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
4310 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4312 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4314 dd->ga2la = new gmx_ga2la_t(natoms_tot,
4315 static_cast<int>(vol_frac*natoms_tot));
4318 /*! \brief Set some important DD parameters that can be modified by env.vars */
4319 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
4320 gmx_domdec_t *dd, int rank_mysim)
4322 gmx_domdec_comm_t *comm = dd->comm;
4324 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
4325 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
4326 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
4327 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
4328 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
4329 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
4330 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
4334 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
4339 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
4340 if (comm->eFlop > 1)
4342 srand(1 + rank_mysim);
4344 comm->bRecordLoad = TRUE;
4348 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4352 DomdecOptions::DomdecOptions() :
4353 checkBondedInteractions(TRUE),
4354 useBondedCommunication(TRUE),
4356 rankOrder(DdRankOrder::pp_pme),
4357 minimumCommunicationRange(0),
4358 constraintCommunicationRange(0),
4359 dlbOption(DlbOption::turnOnWhenUseful),
4365 clear_ivec(numCells);
4368 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
4370 const DomdecOptions &options,
4371 const MdrunOptions &mdrunOptions,
4372 const gmx_mtop_t *mtop,
4373 const t_inputrec *ir,
4375 gmx::ArrayRef<const gmx::RVec> xGlobal,
4376 gmx::LocalAtomSetManager *atomSets)
4380 GMX_LOG(mdlog.info).appendTextFormatted(
4381 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
4383 dd = new gmx_domdec_t;
4385 dd->comm = init_dd_comm();
4387 /* Initialize DD paritioning counters */
4388 dd->comm->partition_step = INT_MIN;
4391 set_dd_envvar_options(mdlog, dd, cr->nodeid);
4393 gmx_ddbox_t ddbox = {0};
4394 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
4399 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
4401 if (thisRankHasDuty(cr, DUTY_PP))
4403 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
4405 setup_neighbor_relations(dd);
4408 /* Set overallocation to avoid frequent reallocation of arrays */
4409 set_over_alloc_dd(TRUE);
4411 clear_dd_cycle_counts(dd);
4413 dd->atomSets = atomSets;
4418 static gmx_bool test_dd_cutoff(t_commrec *cr,
4419 t_state *state, const t_inputrec *ir,
4430 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4434 for (d = 0; d < dd->ndim; d++)
4438 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4439 if (dynamic_dd_box(&ddbox, ir))
4441 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4444 np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4446 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4448 if (np > dd->comm->cd[d].np_dlb)
4453 /* If a current local cell size is smaller than the requested
4454 * cut-off, we could still fix it, but this gets very complicated.
4455 * Without fixing here, we might actually need more checks.
4457 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4464 if (!isDlbDisabled(dd->comm))
4466 /* If DLB is not active yet, we don't need to check the grid jumps.
4467 * Actually we shouldn't, because then the grid jump data is not set.
4469 if (isDlbOn(dd->comm) &&
4470 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4475 gmx_sumi(1, &LocallyLimited, cr);
4477 if (LocallyLimited > 0)
4486 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4489 gmx_bool bCutoffAllowed;
4491 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4495 cr->dd->comm->cutoff = cutoff_req;
4498 return bCutoffAllowed;
4501 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4503 gmx_domdec_comm_t *comm;
4505 comm = cr->dd->comm;
4507 /* Turn on the DLB limiting (might have been on already) */
4508 comm->bPMELoadBalDLBLimits = TRUE;
4510 /* Change the cut-off limit */
4511 comm->PMELoadBal_max_cutoff = cutoff;
4515 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4516 comm->PMELoadBal_max_cutoff);
4520 /* Sets whether we should later check the load imbalance data, so that
4521 * we can trigger dynamic load balancing if enough imbalance has
4524 * Used after PME load balancing unlocks DLB, so that the check
4525 * whether DLB will be useful can happen immediately.
4527 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4529 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4531 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4535 /* Store the DD partitioning count, so we can ignore cycle counts
4536 * over the next nstlist steps, which are often slower.
4538 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4543 /* Returns if we should check whether there has been enough load
4544 * imbalance to trigger dynamic load balancing.
4546 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4548 if (dd->comm->dlbState != DlbState::offCanTurnOn)
4553 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4555 /* We ignore the first nstlist steps at the start of the run
4556 * or after PME load balancing or after turning DLB off, since
4557 * these often have extra allocation or cache miss overhead.
4562 if (dd->comm->cycl_n[ddCyclStep] == 0)
4564 /* We can have zero timed steps when dd_partition_system is called
4565 * more than once at the same step, e.g. with replica exchange.
4566 * Turning on DLB would trigger an assertion failure later, but is
4567 * also useless right after exchanging replicas.
4572 /* We should check whether we should use DLB directly after
4574 if (dd->comm->bCheckWhetherToTurnDlbOn)
4576 /* This flag was set when the PME load-balancing routines
4577 unlocked DLB, and should now be cleared. */
4578 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4581 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4582 * partitionings (we do not do this every partioning, so that we
4583 * avoid excessive communication). */
4584 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4587 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4589 return isDlbOn(dd->comm);
4592 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4594 return (dd->comm->dlbState == DlbState::offTemporarilyLocked);
4597 void dd_dlb_lock(gmx_domdec_t *dd)
4599 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4600 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4602 dd->comm->dlbState = DlbState::offTemporarilyLocked;
4606 void dd_dlb_unlock(gmx_domdec_t *dd)
4608 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4609 if (dd->comm->dlbState == DlbState::offTemporarilyLocked)
4611 dd->comm->dlbState = DlbState::offCanTurnOn;
4612 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4616 static void merge_cg_buffers(int ncell,
4617 gmx_domdec_comm_dim_t *cd, int pulse,
4619 gmx::ArrayRef<int> index_gl,
4621 rvec *cg_cm, rvec *recv_vr,
4622 gmx::ArrayRef<int> cgindex,
4623 cginfo_mb_t *cginfo_mb, int *cginfo)
4625 gmx_domdec_ind_t *ind, *ind_p;
4626 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4627 int shift, shift_at;
4629 ind = &cd->ind[pulse];
4631 /* First correct the already stored data */
4632 shift = ind->nrecv[ncell];
4633 for (cell = ncell-1; cell >= 0; cell--)
4635 shift -= ind->nrecv[cell];
4638 /* Move the cg's present from previous grid pulses */
4639 cg0 = ncg_cell[ncell+cell];
4640 cg1 = ncg_cell[ncell+cell+1];
4641 cgindex[cg1+shift] = cgindex[cg1];
4642 for (cg = cg1-1; cg >= cg0; cg--)
4644 index_gl[cg+shift] = index_gl[cg];
4645 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4646 cgindex[cg+shift] = cgindex[cg];
4647 cginfo[cg+shift] = cginfo[cg];
4649 /* Correct the already stored send indices for the shift */
4650 for (p = 1; p <= pulse; p++)
4652 ind_p = &cd->ind[p];
4654 for (c = 0; c < cell; c++)
4656 cg0 += ind_p->nsend[c];
4658 cg1 = cg0 + ind_p->nsend[cell];
4659 for (cg = cg0; cg < cg1; cg++)
4661 ind_p->index[cg] += shift;
4667 /* Merge in the communicated buffers */
4671 for (cell = 0; cell < ncell; cell++)
4673 cg1 = ncg_cell[ncell+cell+1] + shift;
4676 /* Correct the old cg indices */
4677 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4679 cgindex[cg+1] += shift_at;
4682 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4684 /* Copy this charge group from the buffer */
4685 index_gl[cg1] = recv_i[cg0];
4686 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4687 /* Add it to the cgindex */
4688 cg_gl = index_gl[cg1];
4689 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4690 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4691 cgindex[cg1+1] = cgindex[cg1] + nat;
4696 shift += ind->nrecv[cell];
4697 ncg_cell[ncell+cell+1] = cg1;
4701 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4704 const gmx::RangePartitioning &atomGroups)
4706 /* Store the atom block boundaries for easy copying of communication buffers
4708 int g = atomGroupStart;
4709 for (int zone = 0; zone < nzone; zone++)
4711 for (gmx_domdec_ind_t &ind : cd->ind)
4713 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4714 ind.cell2at0[zone] = range.begin();
4715 ind.cell2at1[zone] = range.end();
4716 g += ind.nrecv[zone];
4721 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4727 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4729 if (!bLocalCG[link->a[i]])
4738 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4740 real c[DIM][4]; /* the corners for the non-bonded communication */
4741 real cr0; /* corner for rounding */
4742 real cr1[4]; /* corners for rounding */
4743 real bc[DIM]; /* corners for bounded communication */
4744 real bcr1; /* corner for rounding for bonded communication */
4747 /* Determine the corners of the domain(s) we are communicating with */
4749 set_dd_corners(const gmx_domdec_t *dd,
4750 int dim0, int dim1, int dim2,
4754 const gmx_domdec_comm_t *comm;
4755 const gmx_domdec_zones_t *zones;
4760 zones = &comm->zones;
4762 /* Keep the compiler happy */
4766 /* The first dimension is equal for all cells */
4767 c->c[0][0] = comm->cell_x0[dim0];
4770 c->bc[0] = c->c[0][0];
4775 /* This cell row is only seen from the first row */
4776 c->c[1][0] = comm->cell_x0[dim1];
4777 /* All rows can see this row */
4778 c->c[1][1] = comm->cell_x0[dim1];
4779 if (isDlbOn(dd->comm))
4781 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4784 /* For the multi-body distance we need the maximum */
4785 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4788 /* Set the upper-right corner for rounding */
4789 c->cr0 = comm->cell_x1[dim0];
4794 for (j = 0; j < 4; j++)
4796 c->c[2][j] = comm->cell_x0[dim2];
4798 if (isDlbOn(dd->comm))
4800 /* Use the maximum of the i-cells that see a j-cell */
4801 for (i = 0; i < zones->nizone; i++)
4803 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4808 std::max(c->c[2][j-4],
4809 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4815 /* For the multi-body distance we need the maximum */
4816 c->bc[2] = comm->cell_x0[dim2];
4817 for (i = 0; i < 2; i++)
4819 for (j = 0; j < 2; j++)
4821 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4827 /* Set the upper-right corner for rounding */
4828 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4829 * Only cell (0,0,0) can see cell 7 (1,1,1)
4831 c->cr1[0] = comm->cell_x1[dim1];
4832 c->cr1[3] = comm->cell_x1[dim1];
4833 if (isDlbOn(dd->comm))
4835 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4838 /* For the multi-body distance we need the maximum */
4839 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4846 /* Add the atom groups we need to send in this pulse from this zone to
4847 * \p localAtomGroups and \p work
4850 get_zone_pulse_cgs(gmx_domdec_t *dd,
4851 int zonei, int zone,
4853 gmx::ArrayRef<const int> globalAtomGroupIndices,
4854 const gmx::RangePartitioning &atomGroups,
4855 int dim, int dim_ind,
4856 int dim0, int dim1, int dim2,
4857 real r_comm2, real r_bcomm2,
4859 bool distanceIsTriclinic,
4861 real skew_fac2_d, real skew_fac_01,
4862 rvec *v_d, rvec *v_0, rvec *v_1,
4863 const dd_corners_t *c,
4864 const rvec sf2_round,
4865 gmx_bool bDistBonded,
4871 std::vector<int> *localAtomGroups,
4872 dd_comm_setup_work_t *work)
4874 gmx_domdec_comm_t *comm;
4876 gmx_bool bDistMB_pulse;
4878 real r2, rb2, r, tric_sh;
4885 bScrew = (dd->bScrewPBC && dim == XX);
4887 bDistMB_pulse = (bDistMB && bDistBonded);
4889 /* Unpack the work data */
4890 std::vector<int> &ibuf = work->atomGroupBuffer;
4891 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4895 for (cg = cg0; cg < cg1; cg++)
4899 if (!distanceIsTriclinic)
4901 /* Rectangular direction, easy */
4902 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4909 r = cg_cm[cg][dim] - c->bc[dim_ind];
4915 /* Rounding gives at most a 16% reduction
4916 * in communicated atoms
4918 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4920 r = cg_cm[cg][dim0] - c->cr0;
4921 /* This is the first dimension, so always r >= 0 */
4928 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4930 r = cg_cm[cg][dim1] - c->cr1[zone];
4937 r = cg_cm[cg][dim1] - c->bcr1;
4947 /* Triclinic direction, more complicated */
4950 /* Rounding, conservative as the skew_fac multiplication
4951 * will slightly underestimate the distance.
4953 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4955 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4956 for (i = dim0+1; i < DIM; i++)
4958 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4960 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4963 rb[dim0] = rn[dim0];
4966 /* Take care that the cell planes along dim0 might not
4967 * be orthogonal to those along dim1 and dim2.
4969 for (i = 1; i <= dim_ind; i++)
4972 if (normal[dim0][dimd] > 0)
4974 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
4977 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
4982 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4984 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
4986 for (i = dim1+1; i < DIM; i++)
4988 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
4990 rn[dim1] += tric_sh;
4993 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
4994 /* Take care of coupling of the distances
4995 * to the planes along dim0 and dim1 through dim2.
4997 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
4998 /* Take care that the cell planes along dim1
4999 * might not be orthogonal to that along dim2.
5001 if (normal[dim1][dim2] > 0)
5003 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5009 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5012 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5013 /* Take care of coupling of the distances
5014 * to the planes along dim0 and dim1 through dim2.
5016 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5017 /* Take care that the cell planes along dim1
5018 * might not be orthogonal to that along dim2.
5020 if (normal[dim1][dim2] > 0)
5022 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5027 /* The distance along the communication direction */
5028 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5030 for (i = dim+1; i < DIM; i++)
5032 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5037 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5038 /* Take care of coupling of the distances
5039 * to the planes along dim0 and dim1 through dim2.
5041 if (dim_ind == 1 && zonei == 1)
5043 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5049 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5052 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5053 /* Take care of coupling of the distances
5054 * to the planes along dim0 and dim1 through dim2.
5056 if (dim_ind == 1 && zonei == 1)
5058 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5066 ((bDistMB && rb2 < r_bcomm2) ||
5067 (bDist2B && r2 < r_bcomm2)) &&
5069 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5070 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5073 /* Store the local and global atom group indices and position */
5074 localAtomGroups->push_back(cg);
5075 ibuf.push_back(globalAtomGroupIndices[cg]);
5079 if (dd->ci[dim] == 0)
5081 /* Correct cg_cm for pbc */
5082 rvec_add(cg_cm[cg], box[dim], posPbc);
5085 posPbc[YY] = box[YY][YY] - posPbc[YY];
5086 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5091 copy_rvec(cg_cm[cg], posPbc);
5093 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5095 nat += atomGroups.block(cg).size();
5100 work->nsend_zone = nsend_z;
5103 static void clearCommSetupData(dd_comm_setup_work_t *work)
5105 work->localAtomGroupBuffer.clear();
5106 work->atomGroupBuffer.clear();
5107 work->positionBuffer.clear();
5109 work->nsend_zone = 0;
5112 static void setup_dd_communication(gmx_domdec_t *dd,
5113 matrix box, gmx_ddbox_t *ddbox,
5115 t_state *state, PaddedRVecVector *f)
5117 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5118 int nzone, nzone_send, zone, zonei, cg0, cg1;
5119 int c, i, cg, cg_gl, nrcg;
5120 int *zone_cg_range, pos_cg;
5121 gmx_domdec_comm_t *comm;
5122 gmx_domdec_zones_t *zones;
5123 gmx_domdec_comm_dim_t *cd;
5124 cginfo_mb_t *cginfo_mb;
5125 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5126 real r_comm2, r_bcomm2;
5127 dd_corners_t corners;
5128 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5129 real skew_fac2_d, skew_fac_01;
5134 fprintf(debug, "Setting up DD communication\n");
5139 if (comm->dth.empty())
5141 /* Initialize the thread data.
5142 * This can not be done in init_domain_decomposition,
5143 * as the numbers of threads is determined later.
5145 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5146 comm->dth.resize(numThreads);
5149 switch (fr->cutoff_scheme)
5155 cg_cm = as_rvec_array(state->x.data());
5158 gmx_incons("unimplemented");
5161 bBondComm = comm->bBondComm;
5163 /* Do we need to determine extra distances for multi-body bondeds? */
5164 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5166 /* Do we need to determine extra distances for only two-body bondeds? */
5167 bDist2B = (bBondComm && !bDistMB);
5169 r_comm2 = gmx::square(comm->cutoff);
5170 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5174 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5177 zones = &comm->zones;
5180 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5181 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5183 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5185 /* Triclinic stuff */
5186 normal = ddbox->normal;
5190 v_0 = ddbox->v[dim0];
5191 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5193 /* Determine the coupling coefficient for the distances
5194 * to the cell planes along dim0 and dim1 through dim2.
5195 * This is required for correct rounding.
5198 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5201 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5207 v_1 = ddbox->v[dim1];
5210 zone_cg_range = zones->cg_range;
5211 cginfo_mb = fr->cginfo_mb;
5213 zone_cg_range[0] = 0;
5214 zone_cg_range[1] = dd->ncg_home;
5215 comm->zone_ncg1[0] = dd->ncg_home;
5216 pos_cg = dd->ncg_home;
5218 nat_tot = comm->atomRanges.numHomeAtoms();
5220 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5222 dim = dd->dim[dim_ind];
5223 cd = &comm->cd[dim_ind];
5225 /* Check if we need to compute triclinic distances along this dim */
5226 bool distanceIsTriclinic = false;
5227 for (i = 0; i <= dim_ind; i++)
5229 if (ddbox->tric_dir[dd->dim[i]])
5231 distanceIsTriclinic = true;
5235 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5237 /* No pbc in this dimension, the first node should not comm. */
5245 v_d = ddbox->v[dim];
5246 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5248 cd->receiveInPlace = true;
5249 for (int p = 0; p < cd->numPulses(); p++)
5251 /* Only atoms communicated in the first pulse are used
5252 * for multi-body bonded interactions or for bBondComm.
5254 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5256 gmx_domdec_ind_t *ind = &cd->ind[p];
5258 /* Thread 0 writes in the global index array */
5260 clearCommSetupData(&comm->dth[0]);
5262 for (zone = 0; zone < nzone_send; zone++)
5264 if (dim_ind > 0 && distanceIsTriclinic)
5266 /* Determine slightly more optimized skew_fac's
5268 * This reduces the number of communicated atoms
5269 * by about 10% for 3D DD of rhombic dodecahedra.
5271 for (dimd = 0; dimd < dim; dimd++)
5273 sf2_round[dimd] = 1;
5274 if (ddbox->tric_dir[dimd])
5276 for (i = dd->dim[dimd]+1; i < DIM; i++)
5278 /* If we are shifted in dimension i
5279 * and the cell plane is tilted forward
5280 * in dimension i, skip this coupling.
5282 if (!(zones->shift[nzone+zone][i] &&
5283 ddbox->v[dimd][i][dimd] >= 0))
5286 gmx::square(ddbox->v[dimd][i][dimd]);
5289 sf2_round[dimd] = 1/sf2_round[dimd];
5294 zonei = zone_perm[dim_ind][zone];
5297 /* Here we permutate the zones to obtain a convenient order
5298 * for neighbor searching
5300 cg0 = zone_cg_range[zonei];
5301 cg1 = zone_cg_range[zonei+1];
5305 /* Look only at the cg's received in the previous grid pulse
5307 cg1 = zone_cg_range[nzone+zone+1];
5308 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5311 const int numThreads = static_cast<int>(comm->dth.size());
5312 #pragma omp parallel for num_threads(numThreads) schedule(static)
5313 for (int th = 0; th < numThreads; th++)
5317 dd_comm_setup_work_t &work = comm->dth[th];
5319 /* Retain data accumulated into buffers of thread 0 */
5322 clearCommSetupData(&work);
5325 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5326 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5328 /* Get the cg's for this pulse in this zone */
5329 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5330 dd->globalAtomGroupIndices,
5332 dim, dim_ind, dim0, dim1, dim2,
5334 box, distanceIsTriclinic,
5335 normal, skew_fac2_d, skew_fac_01,
5336 v_d, v_0, v_1, &corners, sf2_round,
5337 bDistBonded, bBondComm,
5340 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5343 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5346 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5347 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5348 ind->nsend[zone] = comm->dth[0].nsend_zone;
5349 /* Append data of threads>=1 to the communication buffers */
5350 for (int th = 1; th < numThreads; th++)
5352 const dd_comm_setup_work_t &dth = comm->dth[th];
5354 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5355 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5356 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5357 comm->dth[0].nat += dth.nat;
5358 ind->nsend[zone] += dth.nsend_zone;
5361 /* Clear the counts in case we do not have pbc */
5362 for (zone = nzone_send; zone < nzone; zone++)
5364 ind->nsend[zone] = 0;
5366 ind->nsend[nzone] = ind->index.size();
5367 ind->nsend[nzone + 1] = comm->dth[0].nat;
5368 /* Communicate the number of cg's and atoms to receive */
5369 ddSendrecv(dd, dim_ind, dddirBackward,
5370 ind->nsend, nzone+2,
5371 ind->nrecv, nzone+2);
5375 /* We can receive in place if only the last zone is not empty */
5376 for (zone = 0; zone < nzone-1; zone++)
5378 if (ind->nrecv[zone] > 0)
5380 cd->receiveInPlace = false;
5385 int receiveBufferSize = 0;
5386 if (!cd->receiveInPlace)
5388 receiveBufferSize = ind->nrecv[nzone];
5390 /* These buffer are actually only needed with in-place */
5391 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5392 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5394 dd_comm_setup_work_t &work = comm->dth[0];
5396 /* Make space for the global cg indices */
5397 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5398 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5399 /* Communicate the global cg indices */
5400 gmx::ArrayRef<int> integerBufferRef;
5401 if (cd->receiveInPlace)
5403 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5407 integerBufferRef = globalAtomGroupBuffer.buffer;
5409 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5410 work.atomGroupBuffer, integerBufferRef);
5412 /* Make space for cg_cm */
5413 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5414 if (fr->cutoff_scheme == ecutsGROUP)
5420 cg_cm = as_rvec_array(state->x.data());
5422 /* Communicate cg_cm */
5423 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5424 if (cd->receiveInPlace)
5426 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5430 rvecBufferRef = rvecBuffer.buffer;
5432 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5433 work.positionBuffer, rvecBufferRef);
5435 /* Make the charge group index */
5436 if (cd->receiveInPlace)
5438 zone = (p == 0 ? 0 : nzone - 1);
5439 while (zone < nzone)
5441 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5443 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5444 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5445 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5446 dd->atomGrouping_.appendBlock(nrcg);
5449 /* Update the charge group presence,
5450 * so we can use it in the next pass of the loop.
5452 comm->bLocalCG[cg_gl] = TRUE;
5458 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5461 zone_cg_range[nzone+zone] = pos_cg;
5466 /* This part of the code is never executed with bBondComm. */
5467 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5468 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5470 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5471 dd->globalAtomGroupIndices, integerBufferRef.data(),
5472 cg_cm, as_rvec_array(rvecBufferRef.data()),
5474 fr->cginfo_mb, fr->cginfo);
5475 pos_cg += ind->nrecv[nzone];
5477 nat_tot += ind->nrecv[nzone+1];
5479 if (!cd->receiveInPlace)
5481 /* Store the atom block for easy copying of communication buffers */
5482 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5487 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5491 /* We don't need to update cginfo, since that was alrady done above.
5492 * So we pass NULL for the forcerec.
5494 dd_set_cginfo(dd->globalAtomGroupIndices,
5495 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5496 nullptr, comm->bLocalCG);
5501 fprintf(debug, "Finished setting up DD communication, zones:");
5502 for (c = 0; c < zones->n; c++)
5504 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5506 fprintf(debug, "\n");
5510 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5514 for (c = 0; c < zones->nizone; c++)
5516 zones->izone[c].cg1 = zones->cg_range[c+1];
5517 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5518 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5522 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5524 * Also sets the atom density for the home zone when \p zone_start=0.
5525 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5526 * how many charge groups will move but are still part of the current range.
5527 * \todo When converting domdec to use proper classes, all these variables
5528 * should be private and a method should return the correct count
5529 * depending on an internal state.
5531 * \param[in,out] dd The domain decomposition struct
5532 * \param[in] box The box
5533 * \param[in] ddbox The domain decomposition box struct
5534 * \param[in] zone_start The start of the zone range to set sizes for
5535 * \param[in] zone_end The end of the zone range to set sizes for
5536 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5538 static void set_zones_size(gmx_domdec_t *dd,
5539 matrix box, const gmx_ddbox_t *ddbox,
5540 int zone_start, int zone_end,
5541 int numMovedChargeGroupsInHomeZone)
5543 gmx_domdec_comm_t *comm;
5544 gmx_domdec_zones_t *zones;
5553 zones = &comm->zones;
5555 /* Do we need to determine extra distances for multi-body bondeds? */
5556 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5558 for (z = zone_start; z < zone_end; z++)
5560 /* Copy cell limits to zone limits.
5561 * Valid for non-DD dims and non-shifted dims.
5563 copy_rvec(comm->cell_x0, zones->size[z].x0);
5564 copy_rvec(comm->cell_x1, zones->size[z].x1);
5567 for (d = 0; d < dd->ndim; d++)
5571 for (z = 0; z < zones->n; z++)
5573 /* With a staggered grid we have different sizes
5574 * for non-shifted dimensions.
5576 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5580 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5581 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5585 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5586 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5592 rcmbs = comm->cutoff_mbody;
5593 if (ddbox->tric_dir[dim])
5595 rcs /= ddbox->skew_fac[dim];
5596 rcmbs /= ddbox->skew_fac[dim];
5599 /* Set the lower limit for the shifted zone dimensions */
5600 for (z = zone_start; z < zone_end; z++)
5602 if (zones->shift[z][dim] > 0)
5605 if (!isDlbOn(dd->comm) || d == 0)
5607 zones->size[z].x0[dim] = comm->cell_x1[dim];
5608 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5612 /* Here we take the lower limit of the zone from
5613 * the lowest domain of the zone below.
5617 zones->size[z].x0[dim] =
5618 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5624 zones->size[z].x0[dim] =
5625 zones->size[zone_perm[2][z-4]].x0[dim];
5629 zones->size[z].x0[dim] =
5630 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5633 /* A temporary limit, is updated below */
5634 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5638 for (zi = 0; zi < zones->nizone; zi++)
5640 if (zones->shift[zi][dim] == 0)
5642 /* This takes the whole zone into account.
5643 * With multiple pulses this will lead
5644 * to a larger zone then strictly necessary.
5646 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5647 zones->size[zi].x1[dim]+rcmbs);
5655 /* Loop over the i-zones to set the upper limit of each
5658 for (zi = 0; zi < zones->nizone; zi++)
5660 if (zones->shift[zi][dim] == 0)
5662 /* We should only use zones up to zone_end */
5663 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5664 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5666 if (zones->shift[z][dim] > 0)
5668 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5669 zones->size[zi].x1[dim]+rcs);
5676 for (z = zone_start; z < zone_end; z++)
5678 /* Initialization only required to keep the compiler happy */
5679 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5682 /* To determine the bounding box for a zone we need to find
5683 * the extreme corners of 4, 2 or 1 corners.
5685 nc = 1 << (ddbox->nboundeddim - 1);
5687 for (c = 0; c < nc; c++)
5689 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5693 corner[YY] = zones->size[z].x0[YY];
5697 corner[YY] = zones->size[z].x1[YY];
5701 corner[ZZ] = zones->size[z].x0[ZZ];
5705 corner[ZZ] = zones->size[z].x1[ZZ];
5707 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5708 box[ZZ][1 - dd->dim[0]] != 0)
5710 /* With 1D domain decomposition the cg's are not in
5711 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5712 * Shift the corner of the z-vector back to along the box
5713 * vector of dimension d, so it will later end up at 0 along d.
5714 * This can affect the location of this corner along dd->dim[0]
5715 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5717 int d = 1 - dd->dim[0];
5719 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5721 /* Apply the triclinic couplings */
5722 assert(ddbox->npbcdim <= DIM);
5723 for (i = YY; i < ddbox->npbcdim; i++)
5725 for (j = XX; j < i; j++)
5727 corner[j] += corner[i]*box[i][j]/box[i][i];
5732 copy_rvec(corner, corner_min);
5733 copy_rvec(corner, corner_max);
5737 for (i = 0; i < DIM; i++)
5739 corner_min[i] = std::min(corner_min[i], corner[i]);
5740 corner_max[i] = std::max(corner_max[i], corner[i]);
5744 /* Copy the extreme cornes without offset along x */
5745 for (i = 0; i < DIM; i++)
5747 zones->size[z].bb_x0[i] = corner_min[i];
5748 zones->size[z].bb_x1[i] = corner_max[i];
5750 /* Add the offset along x */
5751 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5752 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5755 if (zone_start == 0)
5758 for (dim = 0; dim < DIM; dim++)
5760 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5762 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5767 for (z = zone_start; z < zone_end; z++)
5769 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5771 zones->size[z].x0[XX], zones->size[z].x1[XX],
5772 zones->size[z].x0[YY], zones->size[z].x1[YY],
5773 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5774 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5776 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5777 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5778 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5783 static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
5788 return a.ind_gl < b.ind_gl;
5790 return a.nsc < b.nsc;
5793 /* Order data in \p dataToSort according to \p sort
5795 * Note: both buffers should have at least \p sort.size() elements.
5797 template <typename T>
5799 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5800 gmx::ArrayRef<T> dataToSort,
5801 gmx::ArrayRef<T> sortBuffer)
5803 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5804 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5806 /* Order the data into the temporary buffer */
5808 for (const gmx_cgsort_t &entry : sort)
5810 sortBuffer[i++] = dataToSort[entry.ind];
5813 /* Copy back to the original array */
5814 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5815 dataToSort.begin());
5818 /* Order data in \p dataToSort according to \p sort
5820 * Note: \p vectorToSort should have at least \p sort.size() elements,
5821 * \p workVector is resized when it is too small.
5823 template <typename T>
5825 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5826 gmx::ArrayRef<T> vectorToSort,
5827 std::vector<T> *workVector)
5829 if (gmx::index(workVector->size()) < sort.size())
5831 workVector->resize(sort.size());
5833 orderVector<T>(sort, vectorToSort, *workVector);
5836 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5837 gmx::ArrayRef<const gmx_cgsort_t> sort,
5838 gmx::ArrayRef<gmx::RVec> v,
5839 gmx::ArrayRef<gmx::RVec> buf)
5841 if (atomGroups == nullptr)
5843 /* Avoid the useless loop of the atoms within a cg */
5844 orderVector(sort, v, buf);
5849 /* Order the data */
5851 for (const gmx_cgsort_t &entry : sort)
5853 for (int i : atomGroups->block(entry.ind))
5855 copy_rvec(v[i], buf[a]);
5861 /* Copy back to the original array */
5862 for (int a = 0; a < atot; a++)
5864 copy_rvec(buf[a], v[a]);
5868 /* Returns whether a < b */
5869 static bool compareCgsort(const gmx_cgsort_t &a,
5870 const gmx_cgsort_t &b)
5872 return (a.nsc < b.nsc ||
5873 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5876 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5877 gmx::ArrayRef<gmx_cgsort_t> moved,
5878 std::vector<gmx_cgsort_t> *sort1)
5880 /* The new indices are not very ordered, so we qsort them */
5881 std::sort(moved.begin(), moved.end(), comp_cgsort);
5883 /* stationary is already ordered, so now we can merge the two arrays */
5884 sort1->resize(stationary.size() + moved.size());
5885 std::merge(stationary.begin(), stationary.end(),
5886 moved.begin(), moved.end(),
5891 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5892 * The order is according to the global charge group index.
5893 * This adds and removes charge groups that moved between domains.
5895 static void dd_sort_order(const gmx_domdec_t *dd,
5896 const t_forcerec *fr,
5898 gmx_domdec_sort_t *sort)
5900 const int *a = fr->ns->grid->cell_index;
5902 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5904 if (ncg_home_old >= 0)
5906 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5907 std::vector<gmx_cgsort_t> &moved = sort->moved;
5909 /* The charge groups that remained in the same ns grid cell
5910 * are completely ordered. So we can sort efficiently by sorting
5911 * the charge groups that did move into the stationary list.
5912 * Note: push_back() seems to be slightly slower than direct access.
5916 for (int i = 0; i < dd->ncg_home; i++)
5918 /* Check if this cg did not move to another node */
5919 if (a[i] < movedValue)
5923 entry.ind_gl = dd->globalAtomGroupIndices[i];
5926 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5928 /* This cg is new on this node or moved ns grid cell */
5929 moved.push_back(entry);
5933 /* This cg did not move */
5934 stationary.push_back(entry);
5941 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5942 stationary.size(), moved.size());
5944 /* Sort efficiently */
5945 orderedSort(stationary, moved, &sort->sorted);
5949 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5951 cgsort.reserve(dd->ncg_home);
5953 for (int i = 0; i < dd->ncg_home; i++)
5955 /* Sort on the ns grid cell indices
5956 * and the global topology index
5960 entry.ind_gl = dd->globalAtomGroupIndices[i];
5962 cgsort.push_back(entry);
5963 if (cgsort[i].nsc < movedValue)
5970 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
5972 /* Determine the order of the charge groups using qsort */
5973 std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
5975 /* Remove the charge groups which are no longer at home here */
5976 cgsort.resize(numCGNew);
5980 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
5981 static void dd_sort_order_nbnxn(const t_forcerec *fr,
5982 std::vector<gmx_cgsort_t> *sort)
5984 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
5986 /* Using push_back() instead of this resize results in much slower code */
5987 sort->resize(atomOrder.size());
5988 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
5989 size_t numSorted = 0;
5990 for (int i : atomOrder)
5994 /* The values of nsc and ind_gl are not used in this case */
5995 buffer[numSorted++].ind = i;
5998 sort->resize(numSorted);
6001 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6004 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6006 switch (fr->cutoff_scheme)
6009 dd_sort_order(dd, fr, ncg_home_old, sort);
6012 dd_sort_order_nbnxn(fr, &sort->sorted);
6015 gmx_incons("unimplemented");
6018 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6020 /* We alloc with the old size, since cgindex is still old */
6021 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6022 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6024 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6026 /* Set the new home atom/charge group count */
6027 dd->ncg_home = sort->sorted.size();
6030 fprintf(debug, "Set the new home charge group count to %d\n",
6034 /* Reorder the state */
6035 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6036 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6038 if (state->flags & (1 << estX))
6040 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6042 if (state->flags & (1 << estV))
6044 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6046 if (state->flags & (1 << estCGP))
6048 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6051 if (fr->cutoff_scheme == ecutsGROUP)
6054 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6055 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6058 /* Reorder the global cg index */
6059 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6060 /* Reorder the cginfo */
6061 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6062 /* Rebuild the local cg index */
6065 /* We make a new, ordered atomGroups object and assign it to
6066 * the old one. This causes some allocation overhead, but saves
6067 * a copy back of the whole index.
6069 gmx::RangePartitioning ordered;
6070 for (const gmx_cgsort_t &entry : cgsort)
6072 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6074 dd->atomGrouping_ = ordered;
6078 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6080 /* Set the home atom number */
6081 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6083 if (fr->cutoff_scheme == ecutsVERLET)
6085 /* The atoms are now exactly in grid order, update the grid order */
6086 nbnxn_set_atomorder(fr->nbv->nbs.get());
6090 /* Copy the sorted ns cell indices back to the ns grid struct */
6091 for (gmx::index i = 0; i < cgsort.size(); i++)
6093 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6095 fr->ns->grid->nr = cgsort.size();
6099 static void add_dd_statistics(gmx_domdec_t *dd)
6101 gmx_domdec_comm_t *comm = dd->comm;
6103 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6105 auto range = static_cast<DDAtomRanges::Type>(i);
6107 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6112 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6114 gmx_domdec_comm_t *comm = dd->comm;
6116 /* Reset all the statistics and counters for total run counting */
6117 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6119 comm->sum_nat[i] = 0;
6123 comm->load_step = 0;
6126 clear_ivec(comm->load_lim);
6131 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6133 gmx_domdec_comm_t *comm = cr->dd->comm;
6135 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6136 gmx_sumd(numRanges, comm->sum_nat, cr);
6138 if (fplog == nullptr)
6143 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6145 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6147 auto range = static_cast<DDAtomRanges::Type>(i);
6148 double av = comm->sum_nat[i]/comm->ndecomp;
6151 case DDAtomRanges::Type::Zones:
6153 " av. #atoms communicated per step for force: %d x %.1f\n",
6156 case DDAtomRanges::Type::Vsites:
6157 if (cr->dd->vsite_comm)
6160 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6161 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6165 case DDAtomRanges::Type::Constraints:
6166 if (cr->dd->constraint_comm)
6169 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6170 1 + ir->nLincsIter, av);
6174 gmx_incons(" Unknown type for DD statistics");
6177 fprintf(fplog, "\n");
6179 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6181 print_dd_load_av(fplog, cr->dd);
6185 // TODO Remove fplog when group scheme and charge groups are gone
6186 void dd_partition_system(FILE *fplog,
6187 const gmx::MDLogger &mdlog,
6189 const t_commrec *cr,
6190 gmx_bool bMasterState,
6192 t_state *state_global,
6193 const gmx_mtop_t *top_global,
6194 const t_inputrec *ir,
6195 t_state *state_local,
6196 PaddedRVecVector *f,
6197 gmx::MDAtoms *mdAtoms,
6198 gmx_localtop_t *top_local,
6201 gmx::Constraints *constr,
6203 gmx_wallcycle *wcycle,
6207 gmx_domdec_comm_t *comm;
6208 gmx_ddbox_t ddbox = {0};
6210 int64_t step_pcoupl;
6211 rvec cell_ns_x0, cell_ns_x1;
6212 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6213 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6214 gmx_bool bRedist, bResortAll;
6215 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6219 wallcycle_start(wcycle, ewcDOMDEC);
6224 // TODO if the update code becomes accessible here, use
6225 // upd->deform for this logic.
6226 bBoxChanged = (bMasterState || inputrecDeform(ir));
6227 if (ir->epc != epcNO)
6229 /* With nstpcouple > 1 pressure coupling happens.
6230 * one step after calculating the pressure.
6231 * Box scaling happens at the end of the MD step,
6232 * after the DD partitioning.
6233 * We therefore have to do DLB in the first partitioning
6234 * after an MD step where P-coupling occurred.
6235 * We need to determine the last step in which p-coupling occurred.
6236 * MRS -- need to validate this for vv?
6238 int n = ir->nstpcouple;
6241 step_pcoupl = step - 1;
6245 step_pcoupl = ((step - 1)/n)*n + 1;
6247 if (step_pcoupl >= comm->partition_step)
6253 bNStGlobalComm = (step % nstglobalcomm == 0);
6261 /* Should we do dynamic load balacing this step?
6262 * Since it requires (possibly expensive) global communication,
6263 * we might want to do DLB less frequently.
6265 if (bBoxChanged || ir->epc != epcNO)
6267 bDoDLB = bBoxChanged;
6271 bDoDLB = bNStGlobalComm;
6275 /* Check if we have recorded loads on the nodes */
6276 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6278 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6280 /* Print load every nstlog, first and last step to the log file */
6281 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6282 comm->n_load_collect == 0 ||
6284 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6286 /* Avoid extra communication due to verbose screen output
6287 * when nstglobalcomm is set.
6289 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6290 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6292 get_load_distribution(dd, wcycle);
6297 GMX_LOG(mdlog.info).asParagraph().appendText(dd_print_load(dd, step-1));
6301 dd_print_load_verbose(dd);
6304 comm->n_load_collect++;
6310 /* Add the measured cycles to the running average */
6311 const float averageFactor = 0.1f;
6312 comm->cyclesPerStepDlbExpAverage =
6313 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6314 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6316 if (comm->dlbState == DlbState::onCanTurnOff &&
6317 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6319 gmx_bool turnOffDlb;
6322 /* If the running averaged cycles with DLB are more
6323 * than before we turned on DLB, turn off DLB.
6324 * We will again run and check the cycles without DLB
6325 * and we can then decide if to turn off DLB forever.
6327 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6328 comm->cyclesPerStepBeforeDLB);
6330 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6333 /* To turn off DLB, we need to redistribute the atoms */
6334 dd_collect_state(dd, state_local, state_global);
6335 bMasterState = TRUE;
6336 turn_off_dlb(mdlog, dd, step);
6340 else if (bCheckWhetherToTurnDlbOn)
6342 gmx_bool turnOffDlbForever = FALSE;
6343 gmx_bool turnOnDlb = FALSE;
6345 /* Since the timings are node dependent, the master decides */
6348 /* If we recently turned off DLB, we want to check if
6349 * performance is better without DLB. We want to do this
6350 * ASAP to minimize the chance that external factors
6351 * slowed down the DLB step are gone here and we
6352 * incorrectly conclude that DLB was causing the slowdown.
6353 * So we measure one nstlist block, no running average.
6355 if (comm->haveTurnedOffDlb &&
6356 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6357 comm->cyclesPerStepDlbExpAverage)
6359 /* After turning off DLB we ran nstlist steps in fewer
6360 * cycles than with DLB. This likely means that DLB
6361 * in not benefical, but this could be due to a one
6362 * time unlucky fluctuation, so we require two such
6363 * observations in close succession to turn off DLB
6366 if (comm->dlbSlowerPartitioningCount > 0 &&
6367 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6369 turnOffDlbForever = TRUE;
6371 comm->haveTurnedOffDlb = false;
6372 /* Register when we last measured DLB slowdown */
6373 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6377 /* Here we check if the max PME rank load is more than 0.98
6378 * the max PP force load. If so, PP DLB will not help,
6379 * since we are (almost) limited by PME. Furthermore,
6380 * DLB will cause a significant extra x/f redistribution
6381 * cost on the PME ranks, which will then surely result
6382 * in lower total performance.
6384 if (cr->npmenodes > 0 &&
6385 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6391 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6397 gmx_bool turnOffDlbForever;
6401 turnOffDlbForever, turnOnDlb
6403 dd_bcast(dd, sizeof(bools), &bools);
6404 if (bools.turnOffDlbForever)
6406 turn_off_dlb_forever(mdlog, dd, step);
6408 else if (bools.turnOnDlb)
6410 turn_on_dlb(mdlog, dd, step);
6415 comm->n_load_have++;
6418 cgs_gl = &comm->cgs_gl;
6423 /* Clear the old state */
6424 clearDDStateIndices(dd, 0, 0);
6427 auto xGlobal = positionsFromStatePointer(state_global);
6429 set_ddbox(dd, true, ir,
6430 DDMASTER(dd) ? state_global->box : nullptr,
6434 distributeState(mdlog, dd, state_global, ddbox, state_local, f);
6436 dd_make_local_cgs(dd, &top_local->cgs);
6438 /* Ensure that we have space for the new distribution */
6439 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6441 if (fr->cutoff_scheme == ecutsGROUP)
6443 calc_cgcm(fplog, 0, dd->ncg_home,
6444 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6447 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6449 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6451 else if (state_local->ddp_count != dd->ddp_count)
6453 if (state_local->ddp_count > dd->ddp_count)
6455 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%" PRId64 ")", state_local->ddp_count, dd->ddp_count);
6458 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6460 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6463 /* Clear the old state */
6464 clearDDStateIndices(dd, 0, 0);
6466 /* Restore the atom group indices from state_local */
6467 restoreAtomGroups(dd, cgs_gl->index, state_local);
6468 make_dd_indices(dd, cgs_gl->index, 0);
6469 ncgindex_set = dd->ncg_home;
6471 if (fr->cutoff_scheme == ecutsGROUP)
6473 /* Redetermine the cg COMs */
6474 calc_cgcm(fplog, 0, dd->ncg_home,
6475 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6478 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6480 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6482 set_ddbox(dd, bMasterState, ir, state_local->box,
6483 true, state_local->x, &ddbox);
6485 bRedist = isDlbOn(comm);
6489 /* We have the full state, only redistribute the cgs */
6491 /* Clear the non-home indices */
6492 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6495 /* Avoid global communication for dim's without pbc and -gcom */
6496 if (!bNStGlobalComm)
6498 copy_rvec(comm->box0, ddbox.box0 );
6499 copy_rvec(comm->box_size, ddbox.box_size);
6501 set_ddbox(dd, bMasterState, ir, state_local->box,
6502 bNStGlobalComm, state_local->x, &ddbox);
6507 /* For dim's without pbc and -gcom */
6508 copy_rvec(ddbox.box0, comm->box0 );
6509 copy_rvec(ddbox.box_size, comm->box_size);
6511 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6514 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6516 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6519 /* Check if we should sort the charge groups */
6520 const bool bSortCG = (bMasterState || bRedist);
6522 ncg_home_old = dd->ncg_home;
6524 /* When repartitioning we mark charge groups that will move to neighboring
6525 * DD cells, but we do not move them right away for performance reasons.
6526 * Thus we need to keep track of how many charge groups will move for
6527 * obtaining correct local charge group / atom counts.
6532 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6534 ncgindex_set = dd->ncg_home;
6535 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6539 GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
6541 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6544 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6546 &comm->cell_x0, &comm->cell_x1,
6547 dd->ncg_home, fr->cg_cm,
6548 cell_ns_x0, cell_ns_x1, &grid_density);
6552 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6555 switch (fr->cutoff_scheme)
6558 copy_ivec(fr->ns->grid->n, ncells_old);
6559 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6560 state_local->box, cell_ns_x0, cell_ns_x1,
6561 fr->rlist, grid_density);
6564 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6567 gmx_incons("unimplemented");
6569 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6570 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6574 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6576 /* Sort the state on charge group position.
6577 * This enables exact restarts from this step.
6578 * It also improves performance by about 15% with larger numbers
6579 * of atoms per node.
6582 /* Fill the ns grid with the home cell,
6583 * so we can sort with the indices.
6585 set_zones_ncg_home(dd);
6587 switch (fr->cutoff_scheme)
6590 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6592 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6594 comm->zones.size[0].bb_x0,
6595 comm->zones.size[0].bb_x1,
6597 comm->zones.dens_zone0,
6599 as_rvec_array(state_local->x.data()),
6600 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6601 fr->nbv->grp[eintLocal].kernel_type,
6604 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6607 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6608 0, dd->ncg_home, fr->cg_cm);
6610 copy_ivec(fr->ns->grid->n, ncells_new);
6613 gmx_incons("unimplemented");
6616 bResortAll = bMasterState;
6618 /* Check if we can user the old order and ns grid cell indices
6619 * of the charge groups to sort the charge groups efficiently.
6621 if (ncells_new[XX] != ncells_old[XX] ||
6622 ncells_new[YY] != ncells_old[YY] ||
6623 ncells_new[ZZ] != ncells_old[ZZ])
6630 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6631 gmx_step_str(step, sbuf), dd->ncg_home);
6633 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6634 bResortAll ? -1 : ncg_home_old);
6636 /* After sorting and compacting we set the correct size */
6637 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6639 /* Rebuild all the indices */
6643 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6646 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6648 /* Setup up the communication and communicate the coordinates */
6649 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6651 /* Set the indices */
6652 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6654 /* Set the charge group boundaries for neighbor searching */
6655 set_cg_boundaries(&comm->zones);
6657 if (fr->cutoff_scheme == ecutsVERLET)
6659 /* When bSortCG=true, we have already set the size for zone 0 */
6660 set_zones_size(dd, state_local->box, &ddbox,
6661 bSortCG ? 1 : 0, comm->zones.n,
6665 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6668 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6669 -1,as_rvec_array(state_local->x.data()),state_local->box);
6672 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6674 /* Extract a local topology from the global topology */
6675 for (int i = 0; i < dd->ndim; i++)
6677 np[dd->dim[i]] = comm->cd[i].numPulses();
6679 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6680 comm->cellsize_min, np,
6682 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6683 vsite, top_global, top_local);
6685 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6687 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6689 /* Set up the special atom communication */
6690 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6691 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6693 auto range = static_cast<DDAtomRanges::Type>(i);
6696 case DDAtomRanges::Type::Vsites:
6697 if (vsite && vsite->n_intercg_vsite)
6699 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6702 case DDAtomRanges::Type::Constraints:
6703 if (dd->bInterCGcons || dd->bInterCGsettles)
6705 /* Only for inter-cg constraints we need special code */
6706 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6707 constr, ir->nProjOrder,
6708 top_local->idef.il);
6712 gmx_incons("Unknown special atom type setup");
6714 comm->atomRanges.setEnd(range, n);
6717 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6719 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6721 /* Make space for the extra coordinates for virtual site
6722 * or constraint communication.
6724 state_local->natoms = comm->atomRanges.numAtomsTotal();
6726 dd_resize_state(state_local, f, state_local->natoms);
6728 if (fr->haveDirectVirialContributions)
6730 if (vsite && vsite->n_intercg_vsite)
6732 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6736 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6738 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6742 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6751 /* Set the number of atoms required for the force calculation.
6752 * Forces need to be constrained when doing energy
6753 * minimization. For simple simulations we could avoid some
6754 * allocation, zeroing and copying, but this is probably not worth
6755 * the complications and checking.
6757 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6758 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6759 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6762 /* Update atom data for mdatoms and several algorithms */
6763 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6764 nullptr, mdAtoms, constr, vsite, nullptr);
6766 auto mdatoms = mdAtoms->mdatoms();
6767 if (!thisRankHasDuty(cr, DUTY_PME))
6769 /* Send the charges and/or c6/sigmas to our PME only node */
6770 gmx_pme_send_parameters(cr,
6772 mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
6773 mdatoms->chargeA, mdatoms->chargeB,
6774 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6775 mdatoms->sigmaA, mdatoms->sigmaB,
6776 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6781 /* Update the local pull groups */
6782 dd_make_local_pull_groups(cr, ir->pull_work);
6785 if (dd->atomSets != nullptr)
6787 /* Update the local atom sets */
6788 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6791 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6792 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6794 add_dd_statistics(dd);
6796 /* Make sure we only count the cycles for this DD partitioning */
6797 clear_dd_cycle_counts(dd);
6799 /* Because the order of the atoms might have changed since
6800 * the last vsite construction, we need to communicate the constructing
6801 * atom coordinates again (for spreading the forces this MD step).
6803 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6805 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6807 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6809 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6810 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6811 -1, as_rvec_array(state_local->x.data()), state_local->box);
6814 /* Store the partitioning step */
6815 comm->partition_step = step;
6817 /* Increase the DD partitioning counter */
6819 /* The state currently matches this DD partitioning count, store it */
6820 state_local->ddp_count = dd->ddp_count;
6823 /* The DD master node knows the complete cg distribution,
6824 * store the count so we can possibly skip the cg info communication.
6826 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6829 if (comm->DD_debug > 0)
6831 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6832 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6833 "after partitioning");
6836 wallcycle_stop(wcycle, ewcDOMDEC);
6839 /*! \brief Check whether bonded interactions are missing, if appropriate */
6840 void checkNumberOfBondedInteractions(const gmx::MDLogger &mdlog,
6842 int totalNumberOfBondedInteractions,
6843 const gmx_mtop_t *top_global,
6844 const gmx_localtop_t *top_local,
6845 const t_state *state,
6846 bool *shouldCheckNumberOfBondedInteractions)
6848 if (*shouldCheckNumberOfBondedInteractions)
6850 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6852 dd_print_missing_interactions(mdlog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6854 *shouldCheckNumberOfBondedInteractions = false;