2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/domdec/localatomsetmanager.h"
58 #include "gromacs/ewald/pme.h"
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/pdbio.h"
61 #include "gromacs/gmxlib/chargegroup.h"
62 #include "gromacs/gmxlib/network.h"
63 #include "gromacs/gmxlib/nrnb.h"
64 #include "gromacs/gpu_utils/gpu_utils.h"
65 #include "gromacs/hardware/hw_info.h"
66 #include "gromacs/imd/imd.h"
67 #include "gromacs/listed-forces/manage-threading.h"
68 #include "gromacs/math/functions.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/math/vectypes.h"
71 #include "gromacs/mdlib/calc_verletbuf.h"
72 #include "gromacs/mdlib/constr.h"
73 #include "gromacs/mdlib/constraintrange.h"
74 #include "gromacs/mdlib/forcerec.h"
75 #include "gromacs/mdlib/gmx_omp_nthreads.h"
76 #include "gromacs/mdlib/lincs.h"
77 #include "gromacs/mdlib/mdatoms.h"
78 #include "gromacs/mdlib/mdrun.h"
79 #include "gromacs/mdlib/mdsetup.h"
80 #include "gromacs/mdlib/nb_verlet.h"
81 #include "gromacs/mdlib/nbnxn_grid.h"
82 #include "gromacs/mdlib/nsgrid.h"
83 #include "gromacs/mdlib/vsite.h"
84 #include "gromacs/mdtypes/commrec.h"
85 #include "gromacs/mdtypes/df_history.h"
86 #include "gromacs/mdtypes/forcerec.h"
87 #include "gromacs/mdtypes/inputrec.h"
88 #include "gromacs/mdtypes/md_enums.h"
89 #include "gromacs/mdtypes/mdatom.h"
90 #include "gromacs/mdtypes/nblist.h"
91 #include "gromacs/mdtypes/state.h"
92 #include "gromacs/pbcutil/ishift.h"
93 #include "gromacs/pbcutil/pbc.h"
94 #include "gromacs/pulling/pull.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/logger.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/strconvert.h"
112 #include "gromacs/utility/stringstream.h"
113 #include "gromacs/utility/stringutil.h"
114 #include "gromacs/utility/textwriter.h"
116 #include "atomdistribution.h"
117 #include "cellsizes.h"
118 #include "distribute.h"
119 #include "domdec_constraints.h"
120 #include "domdec_internal.h"
121 #include "domdec_vsite.h"
122 #include "redistribute.h"
125 #define DD_NLOAD_MAX 9
127 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
129 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
132 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
133 #define DD_FLAG_NRCG 65535
134 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
135 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
137 /* The DD zone order */
138 static const ivec dd_zo[DD_MAXZONE] =
139 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
141 /* The non-bonded zone-pair setup for domain decomposition
142 * The first number is the i-zone, the second number the first j-zone seen by
143 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
144 * As is, this is for 3D decomposition, where there are 4 i-zones.
145 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
146 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
149 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
154 /* Turn on DLB when the load imbalance causes this amount of total loss.
155 * There is a bit of overhead with DLB and it's difficult to achieve
156 * a load imbalance of less than 2% with DLB.
158 #define DD_PERF_LOSS_DLB_ON 0.02
160 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
161 #define DD_PERF_LOSS_WARN 0.05
164 /* We check if to turn on DLB at the first and every 100 DD partitionings.
165 * With large imbalance DLB will turn on at the first step, so we can
166 * make the interval so large that the MPI overhead of the check is negligible.
168 static const int c_checkTurnDlbOnInterval = 100;
169 /* We need to check if DLB results in worse performance and then turn it off.
170 * We check this more often then for turning DLB on, because the DLB can scale
171 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
172 * and furthermore, we are already synchronizing often with DLB, so
173 * the overhead of the MPI Bcast is not that high.
175 static const int c_checkTurnDlbOffInterval = 20;
177 /* Forward declaration */
178 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
182 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
184 static void index2xyz(ivec nc,int ind,ivec xyz)
186 xyz[XX] = ind % nc[XX];
187 xyz[YY] = (ind / nc[XX]) % nc[YY];
188 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
192 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
194 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
195 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
196 xyz[ZZ] = ind % nc[ZZ];
199 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
204 ddindex = dd_index(dd->nc, c);
205 if (dd->comm->bCartesianPP_PME)
207 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
209 else if (dd->comm->bCartesianPP)
212 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
223 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
225 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
228 int ddglatnr(const gmx_domdec_t *dd, int i)
238 if (i >= dd->comm->atomRanges.numAtomsTotal())
240 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
242 atnr = dd->globalAtomIndices[i] + 1;
248 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
250 return &dd->comm->cgs_gl;
253 void dd_store_state(gmx_domdec_t *dd, t_state *state)
257 if (state->ddp_count != dd->ddp_count)
259 gmx_incons("The MD state does not match the domain decomposition state");
262 state->cg_gl.resize(dd->ncg_home);
263 for (i = 0; i < dd->ncg_home; i++)
265 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
268 state->ddp_count_cg_gl = dd->ddp_count;
271 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
273 return &dd->comm->zones;
276 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
277 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
279 gmx_domdec_zones_t *zones;
282 zones = &dd->comm->zones;
285 while (icg >= zones->izone[izone].cg1)
294 else if (izone < zones->nizone)
296 *jcg0 = zones->izone[izone].jcg0;
300 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
301 icg, izone, zones->nizone);
304 *jcg1 = zones->izone[izone].jcg1;
306 for (d = 0; d < dd->ndim; d++)
309 shift0[dim] = zones->izone[izone].shift0[dim];
310 shift1[dim] = zones->izone[izone].shift1[dim];
311 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
313 /* A conservative approach, this can be optimized */
320 int dd_numHomeAtoms(const gmx_domdec_t &dd)
322 return dd.comm->atomRanges.numHomeAtoms();
325 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
327 /* We currently set mdatoms entries for all atoms:
328 * local + non-local + communicated for vsite + constraints
331 return dd->comm->atomRanges.numAtomsTotal();
334 int dd_natoms_vsite(const gmx_domdec_t *dd)
336 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
339 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
341 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
342 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
345 void dd_move_x(gmx_domdec_t *dd,
347 gmx::ArrayRef<gmx::RVec> x,
348 gmx_wallcycle *wcycle)
350 wallcycle_start(wcycle, ewcMOVEX);
353 gmx_domdec_comm_t *comm;
354 gmx_domdec_comm_dim_t *cd;
355 rvec shift = {0, 0, 0};
356 gmx_bool bPBC, bScrew;
360 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
363 nat_tot = comm->atomRanges.numHomeAtoms();
364 for (int d = 0; d < dd->ndim; d++)
366 bPBC = (dd->ci[dd->dim[d]] == 0);
367 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
370 copy_rvec(box[dd->dim[d]], shift);
373 for (const gmx_domdec_ind_t &ind : cd->ind)
375 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
376 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
380 for (int g : ind.index)
382 for (int j : atomGrouping.block(g))
384 sendBuffer[n] = x[j];
391 for (int g : ind.index)
393 for (int j : atomGrouping.block(g))
395 /* We need to shift the coordinates */
396 for (int d = 0; d < DIM; d++)
398 sendBuffer[n][d] = x[j][d] + shift[d];
406 for (int g : ind.index)
408 for (int j : atomGrouping.block(g))
411 sendBuffer[n][XX] = x[j][XX] + shift[XX];
413 * This operation requires a special shift force
414 * treatment, which is performed in calc_vir.
416 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
417 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
423 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
425 gmx::ArrayRef<gmx::RVec> receiveBuffer;
426 if (cd->receiveInPlace)
428 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
432 receiveBuffer = receiveBufferAccess.buffer;
434 /* Send and receive the coordinates */
435 ddSendrecv(dd, d, dddirBackward,
436 sendBuffer, receiveBuffer);
438 if (!cd->receiveInPlace)
441 for (int zone = 0; zone < nzone; zone++)
443 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
445 x[i] = receiveBuffer[j++];
449 nat_tot += ind.nrecv[nzone+1];
454 wallcycle_stop(wcycle, ewcMOVEX);
457 void dd_move_f(gmx_domdec_t *dd,
458 gmx::ArrayRef<gmx::RVec> f,
460 gmx_wallcycle *wcycle)
462 wallcycle_start(wcycle, ewcMOVEF);
465 gmx_domdec_comm_t *comm;
466 gmx_domdec_comm_dim_t *cd;
469 gmx_bool bShiftForcesNeedPbc, bScrew;
473 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
475 nzone = comm->zones.n/2;
476 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
477 for (int d = dd->ndim-1; d >= 0; d--)
479 /* Only forces in domains near the PBC boundaries need to
480 consider PBC in the treatment of fshift */
481 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
482 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
483 if (fshift == nullptr && !bScrew)
485 bShiftForcesNeedPbc = FALSE;
487 /* Determine which shift vector we need */
493 for (int p = cd->numPulses() - 1; p >= 0; p--)
495 const gmx_domdec_ind_t &ind = cd->ind[p];
496 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
497 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
499 nat_tot -= ind.nrecv[nzone+1];
501 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
503 gmx::ArrayRef<gmx::RVec> sendBuffer;
504 if (cd->receiveInPlace)
506 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
510 sendBuffer = sendBufferAccess.buffer;
512 for (int zone = 0; zone < nzone; zone++)
514 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
516 sendBuffer[j++] = f[i];
520 /* Communicate the forces */
521 ddSendrecv(dd, d, dddirForward,
522 sendBuffer, receiveBuffer);
523 /* Add the received forces */
525 if (!bShiftForcesNeedPbc)
527 for (int g : ind.index)
529 for (int j : atomGrouping.block(g))
531 for (int d = 0; d < DIM; d++)
533 f[j][d] += receiveBuffer[n][d];
541 /* fshift should always be defined if this function is
542 * called when bShiftForcesNeedPbc is true */
543 assert(nullptr != fshift);
544 for (int g : ind.index)
546 for (int j : atomGrouping.block(g))
548 for (int d = 0; d < DIM; d++)
550 f[j][d] += receiveBuffer[n][d];
552 /* Add this force to the shift force */
553 for (int d = 0; d < DIM; d++)
555 fshift[is][d] += receiveBuffer[n][d];
563 for (int g : ind.index)
565 for (int j : atomGrouping.block(g))
567 /* Rotate the force */
568 f[j][XX] += receiveBuffer[n][XX];
569 f[j][YY] -= receiveBuffer[n][YY];
570 f[j][ZZ] -= receiveBuffer[n][ZZ];
573 /* Add this force to the shift force */
574 for (int d = 0; d < DIM; d++)
576 fshift[is][d] += receiveBuffer[n][d];
586 wallcycle_stop(wcycle, ewcMOVEF);
589 /* Convenience function for extracting a real buffer from an rvec buffer
591 * To reduce the number of temporary communication buffers and avoid
592 * cache polution, we reuse gmx::RVec buffers for storing reals.
593 * This functions return a real buffer reference with the same number
594 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
596 static gmx::ArrayRef<real>
597 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
599 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
603 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
606 gmx_domdec_comm_t *comm;
607 gmx_domdec_comm_dim_t *cd;
611 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
614 nat_tot = comm->atomRanges.numHomeAtoms();
615 for (int d = 0; d < dd->ndim; d++)
618 for (const gmx_domdec_ind_t &ind : cd->ind)
620 /* Note: We provision for RVec instead of real, so a factor of 3
621 * more than needed. The buffer actually already has this size
622 * and we pass a plain pointer below, so this does not matter.
624 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
625 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
627 for (int g : ind.index)
629 for (int j : atomGrouping.block(g))
631 sendBuffer[n++] = v[j];
635 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
637 gmx::ArrayRef<real> receiveBuffer;
638 if (cd->receiveInPlace)
640 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
644 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
646 /* Send and receive the data */
647 ddSendrecv(dd, d, dddirBackward,
648 sendBuffer, receiveBuffer);
649 if (!cd->receiveInPlace)
652 for (int zone = 0; zone < nzone; zone++)
654 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
656 v[i] = receiveBuffer[j++];
660 nat_tot += ind.nrecv[nzone+1];
666 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
669 gmx_domdec_comm_t *comm;
670 gmx_domdec_comm_dim_t *cd;
674 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
676 nzone = comm->zones.n/2;
677 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
678 for (int d = dd->ndim-1; d >= 0; d--)
681 for (int p = cd->numPulses() - 1; p >= 0; p--)
683 const gmx_domdec_ind_t &ind = cd->ind[p];
685 /* Note: We provision for RVec instead of real, so a factor of 3
686 * more than needed. The buffer actually already has this size
687 * and we typecast, so this works as intended.
689 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
690 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
691 nat_tot -= ind.nrecv[nzone + 1];
693 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
695 gmx::ArrayRef<real> sendBuffer;
696 if (cd->receiveInPlace)
698 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
702 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
704 for (int zone = 0; zone < nzone; zone++)
706 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
708 sendBuffer[j++] = v[i];
712 /* Communicate the forces */
713 ddSendrecv(dd, d, dddirForward,
714 sendBuffer, receiveBuffer);
715 /* Add the received forces */
717 for (int g : ind.index)
719 for (int j : atomGrouping.block(g))
721 v[j] += receiveBuffer[n];
730 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
732 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
734 zone->min0, zone->max1,
735 zone->mch0, zone->mch0,
736 zone->p1_0, zone->p1_1);
739 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
740 * takes the extremes over all home and remote zones in the halo
741 * and returns the results in cell_ns_x0 and cell_ns_x1.
742 * Note: only used with the group cut-off scheme.
744 static void dd_move_cellx(gmx_domdec_t *dd,
745 const gmx_ddbox_t *ddbox,
749 constexpr int c_ddZoneCommMaxNumZones = 5;
750 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
751 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
752 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
753 gmx_domdec_comm_t *comm = dd->comm;
757 for (int d = 1; d < dd->ndim; d++)
759 int dim = dd->dim[d];
760 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
762 /* Copy the base sizes of the home zone */
763 zp.min0 = cell_ns_x0[dim];
764 zp.max1 = cell_ns_x1[dim];
765 zp.min1 = cell_ns_x1[dim];
766 zp.mch0 = cell_ns_x0[dim];
767 zp.mch1 = cell_ns_x1[dim];
768 zp.p1_0 = cell_ns_x0[dim];
769 zp.p1_1 = cell_ns_x1[dim];
772 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
774 /* Loop backward over the dimensions and aggregate the extremes
777 for (int d = dd->ndim - 2; d >= 0; d--)
779 const int dim = dd->dim[d];
780 const bool applyPbc = (dim < ddbox->npbcdim);
782 /* Use an rvec to store two reals */
783 extr_s[d][0] = cellsizes[d + 1].fracLower;
784 extr_s[d][1] = cellsizes[d + 1].fracUpper;
785 extr_s[d][2] = cellsizes[d + 1].fracUpper;
788 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
789 /* Store the extremes in the backward sending buffer,
790 * so they get updated separately from the forward communication.
792 for (int d1 = d; d1 < dd->ndim-1; d1++)
794 /* We invert the order to be able to use the same loop for buf_e */
795 buf_s[pos].min0 = extr_s[d1][1];
796 buf_s[pos].max1 = extr_s[d1][0];
797 buf_s[pos].min1 = extr_s[d1][2];
800 /* Store the cell corner of the dimension we communicate along */
801 buf_s[pos].p1_0 = comm->cell_x0[dim];
806 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
809 if (dd->ndim == 3 && d == 0)
811 buf_s[pos] = comm->zone_d2[0][1];
813 buf_s[pos] = comm->zone_d1[0];
817 /* We only need to communicate the extremes
818 * in the forward direction
820 int numPulses = comm->cd[d].numPulses();
824 /* Take the minimum to avoid double communication */
825 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
829 /* Without PBC we should really not communicate over
830 * the boundaries, but implementing that complicates
831 * the communication setup and therefore we simply
832 * do all communication, but ignore some data.
834 numPulsesMin = numPulses;
836 for (int pulse = 0; pulse < numPulsesMin; pulse++)
838 /* Communicate the extremes forward */
839 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
841 int numElements = dd->ndim - d - 1;
842 ddSendrecv(dd, d, dddirForward,
843 extr_s + d, numElements,
844 extr_r + d, numElements);
846 if (receiveValidData)
848 for (int d1 = d; d1 < dd->ndim - 1; d1++)
850 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
851 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
852 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
857 const int numElementsInBuffer = pos;
858 for (int pulse = 0; pulse < numPulses; pulse++)
860 /* Communicate all the zone information backward */
861 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
863 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
865 int numReals = numElementsInBuffer*c_ddzoneNumReals;
866 ddSendrecv(dd, d, dddirBackward,
867 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
868 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
873 for (int d1 = d + 1; d1 < dd->ndim; d1++)
875 /* Determine the decrease of maximum required
876 * communication height along d1 due to the distance along d,
877 * this avoids a lot of useless atom communication.
879 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
882 if (ddbox->tric_dir[dim])
884 /* c is the off-diagonal coupling between the cell planes
885 * along directions d and d1.
887 c = ddbox->v[dim][dd->dim[d1]][dim];
893 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
896 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
900 /* A negative value signals out of range */
906 /* Accumulate the extremes over all pulses */
907 for (int i = 0; i < numElementsInBuffer; i++)
915 if (receiveValidData)
917 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
918 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
919 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
923 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
931 if (receiveValidData && dh[d1] >= 0)
933 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
934 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
937 /* Copy the received buffer to the send buffer,
938 * to pass the data through with the next pulse.
942 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
943 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
945 /* Store the extremes */
948 for (int d1 = d; d1 < dd->ndim-1; d1++)
950 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
951 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
952 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
956 if (d == 1 || (d == 0 && dd->ndim == 3))
958 for (int i = d; i < 2; i++)
960 comm->zone_d2[1-d][i] = buf_e[pos];
966 comm->zone_d1[1] = buf_e[pos];
975 int dim = dd->dim[1];
976 for (int i = 0; i < 2; i++)
980 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
982 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
983 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
988 int dim = dd->dim[2];
989 for (int i = 0; i < 2; i++)
991 for (int j = 0; j < 2; j++)
995 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
997 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
998 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1002 for (int d = 1; d < dd->ndim; d++)
1004 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1005 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1008 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1009 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1014 static void write_dd_grid_pdb(const char *fn, int64_t step,
1015 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1017 rvec grid_s[2], *grid_r = nullptr, cx, r;
1018 char fname[STRLEN], buf[22];
1020 int a, i, d, z, y, x;
1024 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1025 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1029 snew(grid_r, 2*dd->nnodes);
1032 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1036 for (d = 0; d < DIM; d++)
1038 for (i = 0; i < DIM; i++)
1046 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1048 tric[d][i] = box[i][d]/box[i][i];
1057 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1058 out = gmx_fio_fopen(fname, "w");
1059 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1061 for (i = 0; i < dd->nnodes; i++)
1063 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1064 for (d = 0; d < DIM; d++)
1066 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1068 for (z = 0; z < 2; z++)
1070 for (y = 0; y < 2; y++)
1072 for (x = 0; x < 2; x++)
1074 cx[XX] = grid_r[i*2+x][XX];
1075 cx[YY] = grid_r[i*2+y][YY];
1076 cx[ZZ] = grid_r[i*2+z][ZZ];
1078 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1079 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1083 for (d = 0; d < DIM; d++)
1085 for (x = 0; x < 4; x++)
1089 case 0: y = 1 + i*8 + 2*x; break;
1090 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1091 case 2: y = 1 + i*8 + x; break;
1093 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1097 gmx_fio_fclose(out);
1102 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1103 const gmx_mtop_t *mtop, const t_commrec *cr,
1104 int natoms, const rvec x[], const matrix box)
1106 char fname[STRLEN], buf[22];
1109 const char *atomname, *resname;
1115 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1118 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1120 out = gmx_fio_fopen(fname, "w");
1122 fprintf(out, "TITLE %s\n", title);
1123 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1125 for (int i = 0; i < natoms; i++)
1127 int ii = dd->globalAtomIndices[i];
1128 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1131 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1134 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1140 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1142 b = dd->comm->zones.n;
1146 b = dd->comm->zones.n + 1;
1148 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1149 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1151 fprintf(out, "TER\n");
1153 gmx_fio_fclose(out);
1156 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1158 gmx_domdec_comm_t *comm;
1165 if (comm->bInterCGBondeds)
1167 if (comm->cutoff_mbody > 0)
1169 r = comm->cutoff_mbody;
1173 /* cutoff_mbody=0 means we do not have DLB */
1174 r = comm->cellsize_min[dd->dim[0]];
1175 for (di = 1; di < dd->ndim; di++)
1177 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1179 if (comm->bBondComm)
1181 r = std::max(r, comm->cutoff_mbody);
1185 r = std::min(r, comm->cutoff);
1193 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1197 r_mb = dd_cutoff_multibody(dd);
1199 return std::max(dd->comm->cutoff, r_mb);
1203 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1208 nc = dd->nc[dd->comm->cartpmedim];
1209 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1210 copy_ivec(coord, coord_pme);
1211 coord_pme[dd->comm->cartpmedim] =
1212 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1215 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1220 npme = dd->comm->npmenodes;
1222 /* Here we assign a PME node to communicate with this DD node
1223 * by assuming that the major index of both is x.
1224 * We add cr->npmenodes/2 to obtain an even distribution.
1226 return (ddindex*npme + npme/2)/npp;
1229 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1234 snew(pme_rank, dd->comm->npmenodes);
1236 for (i = 0; i < dd->nnodes; i++)
1238 p0 = ddindex2pmeindex(dd, i);
1239 p1 = ddindex2pmeindex(dd, i+1);
1240 if (i+1 == dd->nnodes || p1 > p0)
1244 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1246 pme_rank[n] = i + 1 + n;
1254 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1262 if (dd->comm->bCartesian) {
1263 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1264 dd_coords2pmecoords(dd,coords,coords_pme);
1265 copy_ivec(dd->ntot,nc);
1266 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1267 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1269 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1271 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1277 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1282 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1284 gmx_domdec_comm_t *comm;
1286 int ddindex, nodeid = -1;
1288 comm = cr->dd->comm;
1293 if (comm->bCartesianPP_PME)
1296 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1301 ddindex = dd_index(cr->dd->nc, coords);
1302 if (comm->bCartesianPP)
1304 nodeid = comm->ddindex2simnodeid[ddindex];
1310 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1322 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1323 const t_commrec gmx_unused *cr,
1328 const gmx_domdec_comm_t *comm = dd->comm;
1330 /* This assumes a uniform x domain decomposition grid cell size */
1331 if (comm->bCartesianPP_PME)
1334 ivec coord, coord_pme;
1335 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1336 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1338 /* This is a PP node */
1339 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1340 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1344 else if (comm->bCartesianPP)
1346 if (sim_nodeid < dd->nnodes)
1348 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1353 /* This assumes DD cells with identical x coordinates
1354 * are numbered sequentially.
1356 if (dd->comm->pmenodes == nullptr)
1358 if (sim_nodeid < dd->nnodes)
1360 /* The DD index equals the nodeid */
1361 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1367 while (sim_nodeid > dd->comm->pmenodes[i])
1371 if (sim_nodeid < dd->comm->pmenodes[i])
1373 pmenode = dd->comm->pmenodes[i];
1381 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1386 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1397 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1401 ivec coord, coord_pme;
1405 std::vector<int> ddranks;
1406 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1408 for (x = 0; x < dd->nc[XX]; x++)
1410 for (y = 0; y < dd->nc[YY]; y++)
1412 for (z = 0; z < dd->nc[ZZ]; z++)
1414 if (dd->comm->bCartesianPP_PME)
1419 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1420 if (dd->ci[XX] == coord_pme[XX] &&
1421 dd->ci[YY] == coord_pme[YY] &&
1422 dd->ci[ZZ] == coord_pme[ZZ])
1424 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1429 /* The slab corresponds to the nodeid in the PME group */
1430 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1432 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1441 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1443 gmx_bool bReceive = TRUE;
1445 if (cr->npmenodes < dd->nnodes)
1447 gmx_domdec_comm_t *comm = dd->comm;
1448 if (comm->bCartesianPP_PME)
1451 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1453 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1454 coords[comm->cartpmedim]++;
1455 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1458 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1459 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1461 /* This is not the last PP node for pmenode */
1466 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1471 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1472 if (cr->sim_nodeid+1 < cr->nnodes &&
1473 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1475 /* This is not the last PP node for pmenode */
1484 static void set_zones_ncg_home(gmx_domdec_t *dd)
1486 gmx_domdec_zones_t *zones;
1489 zones = &dd->comm->zones;
1491 zones->cg_range[0] = 0;
1492 for (i = 1; i < zones->n+1; i++)
1494 zones->cg_range[i] = dd->ncg_home;
1496 /* zone_ncg1[0] should always be equal to ncg_home */
1497 dd->comm->zone_ncg1[0] = dd->ncg_home;
1500 static void restoreAtomGroups(gmx_domdec_t *dd,
1501 const int *gcgs_index, const t_state *state)
1503 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1505 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1506 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1508 globalAtomGroupIndices.resize(atomGroupsState.size());
1509 atomGrouping.clear();
1511 /* Copy back the global charge group indices from state
1512 * and rebuild the local charge group to atom index.
1514 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1516 const int atomGroupGlobal = atomGroupsState[i];
1517 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1518 globalAtomGroupIndices[i] = atomGroupGlobal;
1519 atomGrouping.appendBlock(groupSize);
1522 dd->ncg_home = atomGroupsState.size();
1523 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1525 set_zones_ncg_home(dd);
1528 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1529 t_forcerec *fr, char *bLocalCG)
1531 cginfo_mb_t *cginfo_mb;
1537 cginfo_mb = fr->cginfo_mb;
1538 cginfo = fr->cginfo;
1540 for (cg = cg0; cg < cg1; cg++)
1542 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1546 if (bLocalCG != nullptr)
1548 for (cg = cg0; cg < cg1; cg++)
1550 bLocalCG[index_gl[cg]] = TRUE;
1555 static void make_dd_indices(gmx_domdec_t *dd,
1556 const int *gcgs_index, int cg_start)
1558 const int numZones = dd->comm->zones.n;
1559 const int *zone2cg = dd->comm->zones.cg_range;
1560 const int *zone_ncg1 = dd->comm->zone_ncg1;
1561 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1562 const gmx_bool bCGs = dd->comm->bCGs;
1564 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1565 gmx_ga2la_t &ga2la = *dd->ga2la;
1567 if (zone2cg[1] != dd->ncg_home)
1569 gmx_incons("dd->ncg_zone is not up to date");
1572 /* Make the local to global and global to local atom index */
1573 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1574 globalAtomIndices.resize(a);
1575 for (int zone = 0; zone < numZones; zone++)
1584 cg0 = zone2cg[zone];
1586 int cg1 = zone2cg[zone+1];
1587 int cg1_p1 = cg0 + zone_ncg1[zone];
1589 for (int cg = cg0; cg < cg1; cg++)
1594 /* Signal that this cg is from more than one pulse away */
1597 int cg_gl = globalAtomGroupIndices[cg];
1600 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1602 globalAtomIndices.push_back(a_gl);
1603 ga2la.insert(a_gl, {a, zone1});
1609 globalAtomIndices.push_back(cg_gl);
1610 ga2la.insert(cg_gl, {a, zone1});
1617 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1621 if (bLocalCG == nullptr)
1625 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1627 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1630 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1635 for (int i = 0; i < ncg_sys; i++)
1642 if (ngl != dd->globalAtomGroupIndices.size())
1644 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1651 static void check_index_consistency(gmx_domdec_t *dd,
1652 int natoms_sys, int ncg_sys,
1657 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1659 if (dd->comm->DD_debug > 1)
1661 std::vector<int> have(natoms_sys);
1662 for (int a = 0; a < numAtomsInZones; a++)
1664 int globalAtomIndex = dd->globalAtomIndices[a];
1665 if (have[globalAtomIndex] > 0)
1667 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1671 have[globalAtomIndex] = a + 1;
1676 std::vector<int> have(numAtomsInZones);
1679 for (int i = 0; i < natoms_sys; i++)
1681 if (const auto entry = dd->ga2la->find(i))
1683 const int a = entry->la;
1684 if (a >= numAtomsInZones)
1686 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1692 if (dd->globalAtomIndices[a] != i)
1694 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1701 if (ngl != numAtomsInZones)
1704 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1705 dd->rank, where, ngl, numAtomsInZones);
1707 for (int a = 0; a < numAtomsInZones; a++)
1712 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1713 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1717 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1721 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1722 dd->rank, where, nerr);
1726 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1727 static void clearDDStateIndices(gmx_domdec_t *dd,
1731 gmx_ga2la_t &ga2la = *dd->ga2la;
1735 /* Clear the whole list without the overhead of searching */
1740 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1741 for (int i = 0; i < numAtomsInZones; i++)
1743 ga2la.erase(dd->globalAtomIndices[i]);
1747 char *bLocalCG = dd->comm->bLocalCG;
1750 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1752 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1756 dd_clear_local_vsite_indices(dd);
1758 if (dd->constraints)
1760 dd_clear_local_constraint_indices(dd);
1764 static bool check_grid_jump(int64_t step,
1765 const gmx_domdec_t *dd,
1767 const gmx_ddbox_t *ddbox,
1770 gmx_domdec_comm_t *comm = dd->comm;
1771 bool invalid = false;
1773 for (int d = 1; d < dd->ndim; d++)
1775 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1776 const int dim = dd->dim[d];
1777 const real limit = grid_jump_limit(comm, cutoff, d);
1778 real bfac = ddbox->box_size[dim];
1779 if (ddbox->tric_dir[dim])
1781 bfac *= ddbox->skew_fac[dim];
1783 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1784 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1792 /* This error should never be triggered under normal
1793 * circumstances, but you never know ...
1795 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1796 gmx_step_str(step, buf),
1797 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1805 static float dd_force_load(gmx_domdec_comm_t *comm)
1812 if (comm->eFlop > 1)
1814 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1819 load = comm->cycl[ddCyclF];
1820 if (comm->cycl_n[ddCyclF] > 1)
1822 /* Subtract the maximum of the last n cycle counts
1823 * to get rid of possible high counts due to other sources,
1824 * for instance system activity, that would otherwise
1825 * affect the dynamic load balancing.
1827 load -= comm->cycl_max[ddCyclF];
1831 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1833 float gpu_wait, gpu_wait_sum;
1835 gpu_wait = comm->cycl[ddCyclWaitGPU];
1836 if (comm->cycl_n[ddCyclF] > 1)
1838 /* We should remove the WaitGPU time of the same MD step
1839 * as the one with the maximum F time, since the F time
1840 * and the wait time are not independent.
1841 * Furthermore, the step for the max F time should be chosen
1842 * the same on all ranks that share the same GPU.
1843 * But to keep the code simple, we remove the average instead.
1844 * The main reason for artificially long times at some steps
1845 * is spurious CPU activity or MPI time, so we don't expect
1846 * that changes in the GPU wait time matter a lot here.
1848 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
1850 /* Sum the wait times over the ranks that share the same GPU */
1851 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1852 comm->mpi_comm_gpu_shared);
1853 /* Replace the wait time by the average over the ranks */
1854 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1862 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1864 gmx_domdec_comm_t *comm;
1869 snew(*dim_f, dd->nc[dim]+1);
1871 for (i = 1; i < dd->nc[dim]; i++)
1873 if (comm->slb_frac[dim])
1875 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1879 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1882 (*dim_f)[dd->nc[dim]] = 1;
1885 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1887 int pmeindex, slab, nso, i;
1890 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1896 ddpme->dim = dimind;
1898 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1900 ddpme->nslab = (ddpme->dim == 0 ?
1901 dd->comm->npmenodes_x :
1902 dd->comm->npmenodes_y);
1904 if (ddpme->nslab <= 1)
1909 nso = dd->comm->npmenodes/ddpme->nslab;
1910 /* Determine for each PME slab the PP location range for dimension dim */
1911 snew(ddpme->pp_min, ddpme->nslab);
1912 snew(ddpme->pp_max, ddpme->nslab);
1913 for (slab = 0; slab < ddpme->nslab; slab++)
1915 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1916 ddpme->pp_max[slab] = 0;
1918 for (i = 0; i < dd->nnodes; i++)
1920 ddindex2xyz(dd->nc, i, xyz);
1921 /* For y only use our y/z slab.
1922 * This assumes that the PME x grid size matches the DD grid size.
1924 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1926 pmeindex = ddindex2pmeindex(dd, i);
1929 slab = pmeindex/nso;
1933 slab = pmeindex % ddpme->nslab;
1935 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1936 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1940 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1943 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1945 if (dd->comm->ddpme[0].dim == XX)
1947 return dd->comm->ddpme[0].maxshift;
1955 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1957 if (dd->comm->ddpme[0].dim == YY)
1959 return dd->comm->ddpme[0].maxshift;
1961 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1963 return dd->comm->ddpme[1].maxshift;
1971 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1973 rvec cell_ns_x0, rvec cell_ns_x1,
1976 gmx_domdec_comm_t *comm;
1981 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1983 dim = dd->dim[dim_ind];
1985 /* Without PBC we don't have restrictions on the outer cells */
1986 if (!(dim >= ddbox->npbcdim &&
1987 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1989 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1990 comm->cellsize_min[dim])
1993 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1994 gmx_step_str(step, buf), dim2char(dim),
1995 comm->cell_x1[dim] - comm->cell_x0[dim],
1996 ddbox->skew_fac[dim],
1997 dd->comm->cellsize_min[dim],
1998 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2002 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2004 /* Communicate the boundaries and update cell_ns_x0/1 */
2005 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2006 if (isDlbOn(dd->comm) && dd->ndim > 1)
2008 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2013 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2015 /* Note that the cycles value can be incorrect, either 0 or some
2016 * extremely large value, when our thread migrated to another core
2017 * with an unsynchronized cycle counter. If this happens less often
2018 * that once per nstlist steps, this will not cause issues, since
2019 * we later subtract the maximum value from the sum over nstlist steps.
2020 * A zero count will slightly lower the total, but that's a small effect.
2021 * Note that the main purpose of the subtraction of the maximum value
2022 * is to avoid throwing off the load balancing when stalls occur due
2023 * e.g. system activity or network congestion.
2025 dd->comm->cycl[ddCycl] += cycles;
2026 dd->comm->cycl_n[ddCycl]++;
2027 if (cycles > dd->comm->cycl_max[ddCycl])
2029 dd->comm->cycl_max[ddCycl] = cycles;
2033 static double force_flop_count(t_nrnb *nrnb)
2040 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2042 /* To get closer to the real timings, we half the count
2043 * for the normal loops and again half it for water loops.
2046 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2048 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2052 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2055 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2058 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2060 sum += nrnb->n[i]*cost_nrnb(i);
2063 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2065 sum += nrnb->n[i]*cost_nrnb(i);
2071 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2073 if (dd->comm->eFlop)
2075 dd->comm->flop -= force_flop_count(nrnb);
2078 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2080 if (dd->comm->eFlop)
2082 dd->comm->flop += force_flop_count(nrnb);
2087 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2091 for (i = 0; i < ddCyclNr; i++)
2093 dd->comm->cycl[i] = 0;
2094 dd->comm->cycl_n[i] = 0;
2095 dd->comm->cycl_max[i] = 0;
2098 dd->comm->flop_n = 0;
2101 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2103 gmx_domdec_comm_t *comm;
2104 domdec_load_t *load;
2105 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2110 fprintf(debug, "get_load_distribution start\n");
2113 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2117 bSepPME = (dd->pme_nodeid >= 0);
2119 if (dd->ndim == 0 && bSepPME)
2121 /* Without decomposition, but with PME nodes, we need the load */
2122 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2123 comm->load[0].pme = comm->cycl[ddCyclPME];
2126 for (int d = dd->ndim - 1; d >= 0; d--)
2128 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2129 const int dim = dd->dim[d];
2130 /* Check if we participate in the communication in this dimension */
2131 if (d == dd->ndim-1 ||
2132 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2134 load = &comm->load[d];
2135 if (isDlbOn(dd->comm))
2137 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2140 if (d == dd->ndim-1)
2142 sbuf[pos++] = dd_force_load(comm);
2143 sbuf[pos++] = sbuf[0];
2144 if (isDlbOn(dd->comm))
2146 sbuf[pos++] = sbuf[0];
2147 sbuf[pos++] = cell_frac;
2150 sbuf[pos++] = cellsizes.fracLowerMax;
2151 sbuf[pos++] = cellsizes.fracUpperMin;
2156 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2157 sbuf[pos++] = comm->cycl[ddCyclPME];
2162 sbuf[pos++] = comm->load[d+1].sum;
2163 sbuf[pos++] = comm->load[d+1].max;
2164 if (isDlbOn(dd->comm))
2166 sbuf[pos++] = comm->load[d+1].sum_m;
2167 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2168 sbuf[pos++] = comm->load[d+1].flags;
2171 sbuf[pos++] = cellsizes.fracLowerMax;
2172 sbuf[pos++] = cellsizes.fracUpperMin;
2177 sbuf[pos++] = comm->load[d+1].mdf;
2178 sbuf[pos++] = comm->load[d+1].pme;
2182 /* Communicate a row in DD direction d.
2183 * The communicators are setup such that the root always has rank 0.
2186 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2187 load->load, load->nload*sizeof(float), MPI_BYTE,
2188 0, comm->mpi_comm_load[d]);
2190 if (dd->ci[dim] == dd->master_ci[dim])
2192 /* We are the master along this row, process this row */
2193 RowMaster *rowMaster = nullptr;
2197 rowMaster = cellsizes.rowMaster.get();
2207 for (int i = 0; i < dd->nc[dim]; i++)
2209 load->sum += load->load[pos++];
2210 load->max = std::max(load->max, load->load[pos]);
2212 if (isDlbOn(dd->comm))
2214 if (rowMaster->dlbIsLimited)
2216 /* This direction could not be load balanced properly,
2217 * therefore we need to use the maximum iso the average load.
2219 load->sum_m = std::max(load->sum_m, load->load[pos]);
2223 load->sum_m += load->load[pos];
2226 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2230 load->flags = gmx::roundToInt(load->load[pos++]);
2234 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2235 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2240 load->mdf = std::max(load->mdf, load->load[pos]);
2242 load->pme = std::max(load->pme, load->load[pos]);
2246 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2248 load->sum_m *= dd->nc[dim];
2249 load->flags |= (1<<d);
2257 comm->nload += dd_load_count(comm);
2258 comm->load_step += comm->cycl[ddCyclStep];
2259 comm->load_sum += comm->load[0].sum;
2260 comm->load_max += comm->load[0].max;
2263 for (int d = 0; d < dd->ndim; d++)
2265 if (comm->load[0].flags & (1<<d))
2267 comm->load_lim[d]++;
2273 comm->load_mdf += comm->load[0].mdf;
2274 comm->load_pme += comm->load[0].pme;
2278 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2282 fprintf(debug, "get_load_distribution finished\n");
2286 static float dd_force_load_fraction(gmx_domdec_t *dd)
2288 /* Return the relative performance loss on the total run time
2289 * due to the force calculation load imbalance.
2291 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2293 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2301 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2303 /* Return the relative performance loss on the total run time
2304 * due to the force calculation load imbalance.
2306 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2309 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2310 (dd->comm->load_step*dd->nnodes);
2318 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2320 gmx_domdec_comm_t *comm = dd->comm;
2322 /* Only the master rank prints loads and only if we measured loads */
2323 if (!DDMASTER(dd) || comm->nload == 0)
2329 int numPpRanks = dd->nnodes;
2330 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2331 int numRanks = numPpRanks + numPmeRanks;
2332 float lossFraction = 0;
2334 /* Print the average load imbalance and performance loss */
2335 if (dd->nnodes > 1 && comm->load_sum > 0)
2337 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2338 lossFraction = dd_force_imb_perf_loss(dd);
2340 std::string msg = "\n Dynamic load balancing report:\n";
2341 std::string dlbStateStr;
2343 switch (dd->comm->dlbState)
2345 case DlbState::offUser:
2346 dlbStateStr = "DLB was off during the run per user request.";
2348 case DlbState::offForever:
2349 /* Currectly this can happen due to performance loss observed, cell size
2350 * limitations or incompatibility with other settings observed during
2351 * determineInitialDlbState(). */
2352 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2354 case DlbState::offCanTurnOn:
2355 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2357 case DlbState::offTemporarilyLocked:
2358 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2360 case DlbState::onCanTurnOff:
2361 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2363 case DlbState::onUser:
2364 dlbStateStr = "DLB was permanently on during the run per user request.";
2367 GMX_ASSERT(false, "Undocumented DLB state");
2370 msg += " " + dlbStateStr + "\n";
2371 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2372 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2373 gmx::roundToInt(dd_force_load_fraction(dd)*100));
2374 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2376 fprintf(fplog, "%s", msg.c_str());
2377 fprintf(stderr, "%s", msg.c_str());
2380 /* Print during what percentage of steps the load balancing was limited */
2381 bool dlbWasLimited = false;
2384 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2385 for (int d = 0; d < dd->ndim; d++)
2387 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2388 sprintf(buf+strlen(buf), " %c %d %%",
2389 dim2char(dd->dim[d]), limitPercentage);
2390 if (limitPercentage >= 50)
2392 dlbWasLimited = true;
2395 sprintf(buf + strlen(buf), "\n");
2396 fprintf(fplog, "%s", buf);
2397 fprintf(stderr, "%s", buf);
2400 /* Print the performance loss due to separate PME - PP rank imbalance */
2401 float lossFractionPme = 0;
2402 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2404 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2405 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2406 if (lossFractionPme <= 0)
2408 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2412 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2414 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2415 fprintf(fplog, "%s", buf);
2416 fprintf(stderr, "%s", buf);
2417 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2418 fprintf(fplog, "%s", buf);
2419 fprintf(stderr, "%s", buf);
2421 fprintf(fplog, "\n");
2422 fprintf(stderr, "\n");
2424 if (lossFraction >= DD_PERF_LOSS_WARN)
2427 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2428 " in the domain decomposition.\n", lossFraction*100);
2431 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2433 else if (dlbWasLimited)
2435 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2437 fprintf(fplog, "%s\n", buf);
2438 fprintf(stderr, "%s\n", buf);
2440 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2443 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2444 " had %s work to do than the PP ranks.\n"
2445 " You might want to %s the number of PME ranks\n"
2446 " or %s the cut-off and the grid spacing.\n",
2447 std::fabs(lossFractionPme*100),
2448 (lossFractionPme < 0) ? "less" : "more",
2449 (lossFractionPme < 0) ? "decrease" : "increase",
2450 (lossFractionPme < 0) ? "decrease" : "increase");
2451 fprintf(fplog, "%s\n", buf);
2452 fprintf(stderr, "%s\n", buf);
2456 static float dd_vol_min(gmx_domdec_t *dd)
2458 return dd->comm->load[0].cvol_min*dd->nnodes;
2461 static int dd_load_flags(gmx_domdec_t *dd)
2463 return dd->comm->load[0].flags;
2466 static float dd_f_imbal(gmx_domdec_t *dd)
2468 if (dd->comm->load[0].sum > 0)
2470 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2474 /* Something is wrong in the cycle counting, report no load imbalance */
2479 float dd_pme_f_ratio(gmx_domdec_t *dd)
2481 /* Should only be called on the DD master rank */
2482 assert(DDMASTER(dd));
2484 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2486 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2495 dd_print_load(gmx_domdec_t *dd,
2498 gmx::StringOutputStream stream;
2499 gmx::TextWriter log(&stream);
2501 int flags = dd_load_flags(dd);
2504 log.writeString("DD load balancing is limited by minimum cell size in dimension");
2505 for (int d = 0; d < dd->ndim; d++)
2509 log.writeStringFormatted(" %c", dim2char(dd->dim[d]));
2512 log.ensureLineBreak();
2514 log.writeString("DD step %s" + gmx::toString(step));
2515 if (isDlbOn(dd->comm))
2517 log.writeStringFormatted(" vol min/aver %5.3f%c",
2518 dd_vol_min(dd), flags ? '!' : ' ');
2522 log.writeStringFormatted(" load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2524 if (dd->comm->cycl_n[ddCyclPME])
2526 log.writeStringFormatted(" pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2528 log.ensureLineBreak();
2529 return stream.toString();
2532 static void dd_print_load_verbose(gmx_domdec_t *dd)
2534 if (isDlbOn(dd->comm))
2536 fprintf(stderr, "vol %4.2f%c ",
2537 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2541 fprintf(stderr, "imb F %2d%% ", gmx::roundToInt(dd_f_imbal(dd)*100));
2543 if (dd->comm->cycl_n[ddCyclPME])
2545 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2550 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2555 gmx_bool bPartOfGroup = FALSE;
2557 dim = dd->dim[dim_ind];
2558 copy_ivec(loc, loc_c);
2559 for (i = 0; i < dd->nc[dim]; i++)
2562 rank = dd_index(dd->nc, loc_c);
2563 if (rank == dd->rank)
2565 /* This process is part of the group */
2566 bPartOfGroup = TRUE;
2569 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2573 dd->comm->mpi_comm_load[dim_ind] = c_row;
2574 if (!isDlbDisabled(dd->comm))
2576 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2578 if (dd->ci[dim] == dd->master_ci[dim])
2580 /* This is the root process of this row */
2581 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2583 RowMaster &rowMaster = *cellsizes.rowMaster;
2584 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2585 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2586 rowMaster.isCellMin.resize(dd->nc[dim]);
2589 rowMaster.bounds.resize(dd->nc[dim]);
2591 rowMaster.buf_ncd.resize(dd->nc[dim]);
2595 /* This is not a root process, we only need to receive cell_f */
2596 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2599 if (dd->ci[dim] == dd->master_ci[dim])
2601 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2607 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2611 int physicalnode_id_hash;
2613 MPI_Comm mpi_comm_pp_physicalnode;
2615 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2617 /* Only ranks with short-ranged tasks (currently) use GPUs.
2618 * If we don't have GPUs assigned, there are no resources to share.
2623 physicalnode_id_hash = gmx_physicalnode_id_hash();
2629 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2630 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2631 dd->rank, physicalnode_id_hash, gpu_id);
2633 /* Split the PP communicator over the physical nodes */
2634 /* TODO: See if we should store this (before), as it's also used for
2635 * for the nodecomm summation.
2637 // TODO PhysicalNodeCommunicator could be extended/used to handle
2638 // the need for per-node per-group communicators.
2639 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2640 &mpi_comm_pp_physicalnode);
2641 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2642 &dd->comm->mpi_comm_gpu_shared);
2643 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2644 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2648 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2651 /* Note that some ranks could share a GPU, while others don't */
2653 if (dd->comm->nrank_gpu_shared == 1)
2655 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2658 GMX_UNUSED_VALUE(cr);
2659 GMX_UNUSED_VALUE(gpu_id);
2663 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2666 int dim0, dim1, i, j;
2671 fprintf(debug, "Making load communicators\n");
2674 snew(dd->comm->load, std::max(dd->ndim, 1));
2675 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2683 make_load_communicator(dd, 0, loc);
2687 for (i = 0; i < dd->nc[dim0]; i++)
2690 make_load_communicator(dd, 1, loc);
2696 for (i = 0; i < dd->nc[dim0]; i++)
2700 for (j = 0; j < dd->nc[dim1]; j++)
2703 make_load_communicator(dd, 2, loc);
2710 fprintf(debug, "Finished making load communicators\n");
2715 /*! \brief Sets up the relation between neighboring domains and zones */
2716 static void setup_neighbor_relations(gmx_domdec_t *dd)
2718 int d, dim, i, j, m;
2720 gmx_domdec_zones_t *zones;
2721 gmx_domdec_ns_ranges_t *izone;
2723 for (d = 0; d < dd->ndim; d++)
2726 copy_ivec(dd->ci, tmp);
2727 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2728 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2729 copy_ivec(dd->ci, tmp);
2730 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2731 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2734 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2737 dd->neighbor[d][1]);
2741 int nzone = (1 << dd->ndim);
2742 int nizone = (1 << std::max(dd->ndim - 1, 0));
2743 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2745 zones = &dd->comm->zones;
2747 for (i = 0; i < nzone; i++)
2750 clear_ivec(zones->shift[i]);
2751 for (d = 0; d < dd->ndim; d++)
2753 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2758 for (i = 0; i < nzone; i++)
2760 for (d = 0; d < DIM; d++)
2762 s[d] = dd->ci[d] - zones->shift[i][d];
2767 else if (s[d] >= dd->nc[d])
2773 zones->nizone = nizone;
2774 for (i = 0; i < zones->nizone; i++)
2776 assert(ddNonbondedZonePairRanges[i][0] == i);
2778 izone = &zones->izone[i];
2779 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2780 * j-zones up to nzone.
2782 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2783 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2784 for (dim = 0; dim < DIM; dim++)
2786 if (dd->nc[dim] == 1)
2788 /* All shifts should be allowed */
2789 izone->shift0[dim] = -1;
2790 izone->shift1[dim] = 1;
2794 /* Determine the min/max j-zone shift wrt the i-zone */
2795 izone->shift0[dim] = 1;
2796 izone->shift1[dim] = -1;
2797 for (j = izone->j0; j < izone->j1; j++)
2799 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2800 if (shift_diff < izone->shift0[dim])
2802 izone->shift0[dim] = shift_diff;
2804 if (shift_diff > izone->shift1[dim])
2806 izone->shift1[dim] = shift_diff;
2813 if (!isDlbDisabled(dd->comm))
2815 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2818 if (dd->comm->bRecordLoad)
2820 make_load_communicators(dd);
2824 static void make_pp_communicator(const gmx::MDLogger &mdlog,
2826 t_commrec gmx_unused *cr,
2827 bool gmx_unused reorder)
2830 gmx_domdec_comm_t *comm;
2837 if (comm->bCartesianPP)
2839 /* Set up cartesian communication for the particle-particle part */
2840 GMX_LOG(mdlog.info).appendTextFormatted(
2841 "Will use a Cartesian communicator: %d x %d x %d",
2842 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2844 for (int i = 0; i < DIM; i++)
2848 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
2850 /* We overwrite the old communicator with the new cartesian one */
2851 cr->mpi_comm_mygroup = comm_cart;
2854 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2855 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2857 if (comm->bCartesianPP_PME)
2859 /* Since we want to use the original cartesian setup for sim,
2860 * and not the one after split, we need to make an index.
2862 snew(comm->ddindex2ddnodeid, dd->nnodes);
2863 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2864 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2865 /* Get the rank of the DD master,
2866 * above we made sure that the master node is a PP node.
2876 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2878 else if (comm->bCartesianPP)
2880 if (cr->npmenodes == 0)
2882 /* The PP communicator is also
2883 * the communicator for this simulation
2885 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2887 cr->nodeid = dd->rank;
2889 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2891 /* We need to make an index to go from the coordinates
2892 * to the nodeid of this simulation.
2894 snew(comm->ddindex2simnodeid, dd->nnodes);
2895 snew(buf, dd->nnodes);
2896 if (thisRankHasDuty(cr, DUTY_PP))
2898 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2900 /* Communicate the ddindex to simulation nodeid index */
2901 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2902 cr->mpi_comm_mysim);
2905 /* Determine the master coordinates and rank.
2906 * The DD master should be the same node as the master of this sim.
2908 for (int i = 0; i < dd->nnodes; i++)
2910 if (comm->ddindex2simnodeid[i] == 0)
2912 ddindex2xyz(dd->nc, i, dd->master_ci);
2913 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2918 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2923 /* No Cartesian communicators */
2924 /* We use the rank in dd->comm->all as DD index */
2925 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2926 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2928 clear_ivec(dd->master_ci);
2932 GMX_LOG(mdlog.info).appendTextFormatted(
2933 "Domain decomposition rank %d, coordinates %d %d %d\n",
2934 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2938 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2939 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2943 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2947 gmx_domdec_comm_t *comm = dd->comm;
2949 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2952 snew(comm->ddindex2simnodeid, dd->nnodes);
2953 snew(buf, dd->nnodes);
2954 if (thisRankHasDuty(cr, DUTY_PP))
2956 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2958 /* Communicate the ddindex to simulation nodeid index */
2959 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2960 cr->mpi_comm_mysim);
2964 GMX_UNUSED_VALUE(dd);
2965 GMX_UNUSED_VALUE(cr);
2969 static void split_communicator(const gmx::MDLogger &mdlog,
2970 t_commrec *cr, gmx_domdec_t *dd,
2971 DdRankOrder gmx_unused rankOrder,
2972 bool gmx_unused reorder)
2974 gmx_domdec_comm_t *comm;
2983 if (comm->bCartesianPP)
2985 for (i = 1; i < DIM; i++)
2987 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2989 if (bDiv[YY] || bDiv[ZZ])
2991 comm->bCartesianPP_PME = TRUE;
2992 /* If we have 2D PME decomposition, which is always in x+y,
2993 * we stack the PME only nodes in z.
2994 * Otherwise we choose the direction that provides the thinnest slab
2995 * of PME only nodes as this will have the least effect
2996 * on the PP communication.
2997 * But for the PME communication the opposite might be better.
2999 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3001 dd->nc[YY] > dd->nc[ZZ]))
3003 comm->cartpmedim = ZZ;
3007 comm->cartpmedim = YY;
3009 comm->ntot[comm->cartpmedim]
3010 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3014 GMX_LOG(mdlog.info).appendTextFormatted(
3015 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)",
3016 cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3017 GMX_LOG(mdlog.info).appendText("Will not use a Cartesian communicator for PP <-> PME\n");
3021 if (comm->bCartesianPP_PME)
3027 GMX_LOG(mdlog.info).appendTextFormatted(
3028 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
3029 comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3031 for (i = 0; i < DIM; i++)
3035 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
3037 MPI_Comm_rank(comm_cart, &rank);
3038 if (MASTER(cr) && rank != 0)
3040 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3043 /* With this assigment we loose the link to the original communicator
3044 * which will usually be MPI_COMM_WORLD, unless have multisim.
3046 cr->mpi_comm_mysim = comm_cart;
3047 cr->sim_nodeid = rank;
3049 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3051 GMX_LOG(mdlog.info).appendTextFormatted(
3052 "Cartesian rank %d, coordinates %d %d %d\n",
3053 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3055 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3059 if (cr->npmenodes == 0 ||
3060 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3062 cr->duty = DUTY_PME;
3065 /* Split the sim communicator into PP and PME only nodes */
3066 MPI_Comm_split(cr->mpi_comm_mysim,
3067 getThisRankDuties(cr),
3068 dd_index(comm->ntot, dd->ci),
3069 &cr->mpi_comm_mygroup);
3076 case DdRankOrder::pp_pme:
3077 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
3079 case DdRankOrder::interleave:
3080 /* Interleave the PP-only and PME-only ranks */
3081 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
3082 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3084 case DdRankOrder::cartesian:
3087 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3090 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3092 cr->duty = DUTY_PME;
3099 /* Split the sim communicator into PP and PME only nodes */
3100 MPI_Comm_split(cr->mpi_comm_mysim,
3101 getThisRankDuties(cr),
3103 &cr->mpi_comm_mygroup);
3104 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3108 GMX_LOG(mdlog.info).appendTextFormatted(
3109 "This rank does only %s work.\n",
3110 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3113 /*! \brief Generates the MPI communicators for domain decomposition */
3114 static void make_dd_communicators(const gmx::MDLogger &mdlog,
3116 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3118 gmx_domdec_comm_t *comm;
3123 copy_ivec(dd->nc, comm->ntot);
3125 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3126 comm->bCartesianPP_PME = FALSE;
3128 /* Reorder the nodes by default. This might change the MPI ranks.
3129 * Real reordering is only supported on very few architectures,
3130 * Blue Gene is one of them.
3132 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
3134 if (cr->npmenodes > 0)
3136 /* Split the communicator into a PP and PME part */
3137 split_communicator(mdlog, cr, dd, ddRankOrder, CartReorder);
3138 if (comm->bCartesianPP_PME)
3140 /* We (possibly) reordered the nodes in split_communicator,
3141 * so it is no longer required in make_pp_communicator.
3143 CartReorder = FALSE;
3148 /* All nodes do PP and PME */
3150 /* We do not require separate communicators */
3151 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3155 if (thisRankHasDuty(cr, DUTY_PP))
3157 /* Copy or make a new PP communicator */
3158 make_pp_communicator(mdlog, dd, cr, CartReorder);
3162 receive_ddindex2simnodeid(dd, cr);
3165 if (!thisRankHasDuty(cr, DUTY_PME))
3167 /* Set up the commnuication to our PME node */
3168 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3169 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3172 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3173 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3178 dd->pme_nodeid = -1;
3183 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3185 comm->cgs_gl.index[comm->cgs_gl.nr]);
3189 static real *get_slb_frac(const gmx::MDLogger &mdlog,
3190 const char *dir, int nc, const char *size_string)
3192 real *slb_frac, tot;
3197 if (nc > 1 && size_string != nullptr)
3199 GMX_LOG(mdlog.info).appendTextFormatted(
3200 "Using static load balancing for the %s direction", dir);
3203 for (i = 0; i < nc; i++)
3206 sscanf(size_string, "%20lf%n", &dbl, &n);
3209 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3216 std::string relativeCellSizes = "Relative cell sizes:";
3217 for (i = 0; i < nc; i++)
3220 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
3222 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
3228 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3231 gmx_mtop_ilistloop_t iloop;
3234 iloop = gmx_mtop_ilistloop_init(mtop);
3235 while (const InteractionLists *il = gmx_mtop_ilistloop_next(iloop, &nmol))
3237 for (ftype = 0; ftype < F_NRE; ftype++)
3239 if ((interaction_function[ftype].flags & IF_BOND) &&
3242 n += nmol*(*il)[ftype].size()/(1 + NRAL(ftype));
3250 static int dd_getenv(const gmx::MDLogger &mdlog,
3251 const char *env_var, int def)
3257 val = getenv(env_var);
3260 if (sscanf(val, "%20d", &nst) <= 0)
3264 GMX_LOG(mdlog.info).appendTextFormatted(
3265 "Found env.var. %s = %s, using value %d",
3272 static void check_dd_restrictions(const gmx_domdec_t *dd,
3273 const t_inputrec *ir,
3274 const gmx::MDLogger &mdlog)
3276 if (ir->ePBC == epbcSCREW &&
3277 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3279 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3282 if (ir->ns_type == ensSIMPLE)
3284 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3287 if (ir->nstlist == 0)
3289 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3292 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3294 GMX_LOG(mdlog.warning).appendText("comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3298 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3303 r = ddbox->box_size[XX];
3304 for (di = 0; di < dd->ndim; di++)
3307 /* Check using the initial average cell size */
3308 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3314 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3316 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
3317 const std::string &reasonStr,
3318 const gmx::MDLogger &mdlog)
3320 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3321 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3323 if (cmdlineDlbState == DlbState::onUser)
3325 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3327 else if (cmdlineDlbState == DlbState::offCanTurnOn)
3329 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
3331 return DlbState::offForever;
3334 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3336 * This function parses the parameters of "-dlb" command line option setting
3337 * corresponding state values. Then it checks the consistency of the determined
3338 * state with other run parameters and settings. As a result, the initial state
3339 * may be altered or an error may be thrown if incompatibility of options is detected.
3341 * \param [in] mdlog Logger.
3342 * \param [in] dlbOption Enum value for the DLB option.
3343 * \param [in] bRecordLoad True if the load balancer is recording load information.
3344 * \param [in] mdrunOptions Options for mdrun.
3345 * \param [in] ir Pointer mdrun to input parameters.
3346 * \returns DLB initial/startup state.
3348 static DlbState determineInitialDlbState(const gmx::MDLogger &mdlog,
3349 DlbOption dlbOption, gmx_bool bRecordLoad,
3350 const MdrunOptions &mdrunOptions,
3351 const t_inputrec *ir)
3353 DlbState dlbState = DlbState::offCanTurnOn;
3357 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
3358 case DlbOption::no: dlbState = DlbState::offUser; break;
3359 case DlbOption::yes: dlbState = DlbState::onUser; break;
3360 default: gmx_incons("Invalid dlbOption enum value");
3363 /* Reruns don't support DLB: bail or override auto mode */
3364 if (mdrunOptions.rerun)
3366 std::string reasonStr = "it is not supported in reruns.";
3367 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3370 /* Unsupported integrators */
3371 if (!EI_DYNAMICS(ir->eI))
3373 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3374 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3377 /* Without cycle counters we can't time work to balance on */
3380 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3381 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3384 if (mdrunOptions.reproducible)
3386 std::string reasonStr = "you started a reproducible run.";
3389 case DlbState::offUser:
3391 case DlbState::offForever:
3392 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
3394 case DlbState::offCanTurnOn:
3395 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
3396 case DlbState::onCanTurnOff:
3397 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
3399 case DlbState::onUser:
3400 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", mdlog);
3402 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", static_cast<int>(dlbState));
3409 static void set_dd_dim(const gmx::MDLogger &mdlog, gmx_domdec_t *dd)
3412 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3414 /* Decomposition order z,y,x */
3415 GMX_LOG(mdlog.info).appendText("Using domain decomposition order z, y, x");
3416 for (int dim = DIM-1; dim >= 0; dim--)
3418 if (dd->nc[dim] > 1)
3420 dd->dim[dd->ndim++] = dim;
3426 /* Decomposition order x,y,z */
3427 for (int dim = 0; dim < DIM; dim++)
3429 if (dd->nc[dim] > 1)
3431 dd->dim[dd->ndim++] = dim;
3438 /* Set dim[0] to avoid extra checks on ndim in several places */
3443 static gmx_domdec_comm_t *init_dd_comm()
3445 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3447 comm->n_load_have = 0;
3448 comm->n_load_collect = 0;
3450 comm->haveTurnedOffDlb = false;
3452 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3454 comm->sum_nat[i] = 0;
3458 comm->load_step = 0;
3461 clear_ivec(comm->load_lim);
3465 /* This should be replaced by a unique pointer */
3466 comm->balanceRegion = ddBalanceRegionAllocate();
3471 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3472 static void set_dd_limits_and_grid(const gmx::MDLogger &mdlog,
3473 t_commrec *cr, gmx_domdec_t *dd,
3474 const DomdecOptions &options,
3475 const MdrunOptions &mdrunOptions,
3476 const gmx_mtop_t *mtop,
3477 const t_inputrec *ir,
3479 gmx::ArrayRef<const gmx::RVec> xGlobal,
3483 real r_bonded_limit = -1;
3484 const real tenPercentMargin = 1.1;
3485 gmx_domdec_comm_t *comm = dd->comm;
3487 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3488 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3490 dd->pme_recv_f_alloc = 0;
3491 dd->pme_recv_f_buf = nullptr;
3493 /* Initialize to GPU share count to 0, might change later */
3494 comm->nrank_gpu_shared = 0;
3496 comm->dlbState = determineInitialDlbState(mdlog, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3497 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3498 /* To consider turning DLB on after 2*nstlist steps we need to check
3499 * at partitioning count 3. Thus we need to increase the first count by 2.
3501 comm->ddPartioningCountFirstDlbOff += 2;
3503 GMX_LOG(mdlog.info).appendTextFormatted(
3504 "Dynamic load balancing: %s", edlbs_names[int(comm->dlbState)]);
3506 comm->bPMELoadBalDLBLimits = FALSE;
3508 /* Allocate the charge group/atom sorting struct */
3509 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3511 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3513 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3514 mtop->bIntermolecularInteractions);
3515 if (comm->bInterCGBondeds)
3517 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3521 comm->bInterCGMultiBody = FALSE;
3524 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3525 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3529 /* Set the cut-off to some very large value,
3530 * so we don't need if statements everywhere in the code.
3531 * We use sqrt, since the cut-off is squared in some places.
3533 comm->cutoff = GMX_CUTOFF_INF;
3537 comm->cutoff = ir->rlist;
3539 comm->cutoff_mbody = 0;
3541 /* Determine the minimum cell size limit, affected by many factors */
3542 comm->cellsize_limit = 0;
3543 comm->bBondComm = FALSE;
3545 /* We do not allow home atoms to move beyond the neighboring domain
3546 * between domain decomposition steps, which limits the cell size.
3547 * Get an estimate of cell size limit due to atom displacement.
3548 * In most cases this is a large overestimate, because it assumes
3549 * non-interaction atoms.
3550 * We set the chance to 1 in a trillion steps.
3552 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
3553 const real limitForAtomDisplacement =
3554 minCellSizeForAtomDisplacement(*mtop, *ir, c_chanceThatAtomMovesBeyondDomain);
3555 GMX_LOG(mdlog.info).appendTextFormatted(
3556 "Minimum cell size due to atom displacement: %.3f nm",
3557 limitForAtomDisplacement);
3559 comm->cellsize_limit = std::max(comm->cellsize_limit,
3560 limitForAtomDisplacement);
3562 /* TODO: PME decomposition currently requires atoms not to be more than
3563 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
3564 * In nearly all cases, limitForAtomDisplacement will be smaller
3565 * than 2/3*rlist, so the PME requirement is satisfied.
3566 * But it would be better for both correctness and performance
3567 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
3568 * Note that we would need to improve the pairlist buffer case.
3571 if (comm->bInterCGBondeds)
3573 if (options.minimumCommunicationRange > 0)
3575 comm->cutoff_mbody = options.minimumCommunicationRange;
3576 if (options.useBondedCommunication)
3578 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3582 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3584 r_bonded_limit = comm->cutoff_mbody;
3586 else if (ir->bPeriodicMols)
3588 /* Can not easily determine the required cut-off */
3589 GMX_LOG(mdlog.warning).appendText("NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.");
3590 comm->cutoff_mbody = comm->cutoff/2;
3591 r_bonded_limit = comm->cutoff_mbody;
3599 dd_bonded_cg_distance(mdlog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3600 options.checkBondedInteractions,
3603 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3604 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3606 /* We use an initial margin of 10% for the minimum cell size,
3607 * except when we are just below the non-bonded cut-off.
3609 if (options.useBondedCommunication)
3611 if (std::max(r_2b, r_mb) > comm->cutoff)
3613 r_bonded = std::max(r_2b, r_mb);
3614 r_bonded_limit = tenPercentMargin*r_bonded;
3615 comm->bBondComm = TRUE;
3620 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3622 /* We determine cutoff_mbody later */
3626 /* No special bonded communication,
3627 * simply increase the DD cut-off.
3629 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3630 comm->cutoff_mbody = r_bonded_limit;
3631 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3634 GMX_LOG(mdlog.info).appendTextFormatted(
3635 "Minimum cell size due to bonded interactions: %.3f nm",
3638 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3642 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3644 /* There is a cell size limit due to the constraints (P-LINCS) */
3645 rconstr = gmx::constr_r_max(mdlog, mtop, ir);
3646 GMX_LOG(mdlog.info).appendTextFormatted(
3647 "Estimated maximum distance required for P-LINCS: %.3f nm",
3649 if (rconstr > comm->cellsize_limit)
3651 GMX_LOG(mdlog.info).appendText("This distance will limit the DD cell size, you can override this with -rcon");
3654 else if (options.constraintCommunicationRange > 0)
3656 /* Here we do not check for dd->bInterCGcons,
3657 * because one can also set a cell size limit for virtual sites only
3658 * and at this point we don't know yet if there are intercg v-sites.
3660 GMX_LOG(mdlog.info).appendTextFormatted(
3661 "User supplied maximum distance required for P-LINCS: %.3f nm",
3662 options.constraintCommunicationRange);
3663 rconstr = options.constraintCommunicationRange;
3665 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3667 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3669 if (options.numCells[XX] > 0)
3671 copy_ivec(options.numCells, dd->nc);
3672 set_dd_dim(mdlog, dd);
3673 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3675 if (options.numPmeRanks >= 0)
3677 cr->npmenodes = options.numPmeRanks;
3681 /* When the DD grid is set explicitly and -npme is set to auto,
3682 * don't use PME ranks. We check later if the DD grid is
3683 * compatible with the total number of ranks.
3688 real acs = average_cellsize_min(dd, ddbox);
3689 if (acs < comm->cellsize_limit)
3691 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3692 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3693 acs, comm->cellsize_limit);
3698 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3700 /* We need to choose the optimal DD grid and possibly PME nodes */
3702 dd_choose_grid(mdlog, cr, dd, ir, mtop, box, ddbox,
3703 options.numPmeRanks,
3704 !isDlbDisabled(comm),
3706 comm->cellsize_limit, comm->cutoff,
3707 comm->bInterCGBondeds);
3709 if (dd->nc[XX] == 0)
3712 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3713 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3714 !bC ? "-rdd" : "-rcon",
3715 comm->dlbState != DlbState::offUser ? " or -dds" : "",
3716 bC ? " or your LINCS settings" : "");
3718 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3719 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3721 "Look in the log file for details on the domain decomposition",
3722 cr->nnodes-cr->npmenodes, limit, buf);
3724 set_dd_dim(mdlog, dd);
3727 GMX_LOG(mdlog.info).appendTextFormatted(
3728 "Domain decomposition grid %d x %d x %d, separate PME ranks %d",
3729 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3731 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3732 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3734 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3735 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3736 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3738 if (cr->npmenodes > dd->nnodes)
3740 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3741 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3743 if (cr->npmenodes > 0)
3745 comm->npmenodes = cr->npmenodes;
3749 comm->npmenodes = dd->nnodes;
3752 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3754 /* The following choices should match those
3755 * in comm_cost_est in domdec_setup.c.
3756 * Note that here the checks have to take into account
3757 * that the decomposition might occur in a different order than xyz
3758 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3759 * in which case they will not match those in comm_cost_est,
3760 * but since that is mainly for testing purposes that's fine.
3762 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3763 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3764 getenv("GMX_PMEONEDD") == nullptr)
3766 comm->npmedecompdim = 2;
3767 comm->npmenodes_x = dd->nc[XX];
3768 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3772 /* In case nc is 1 in both x and y we could still choose to
3773 * decompose pme in y instead of x, but we use x for simplicity.
3775 comm->npmedecompdim = 1;
3776 if (dd->dim[0] == YY)
3778 comm->npmenodes_x = 1;
3779 comm->npmenodes_y = comm->npmenodes;
3783 comm->npmenodes_x = comm->npmenodes;
3784 comm->npmenodes_y = 1;
3787 GMX_LOG(mdlog.info).appendTextFormatted(
3788 "PME domain decomposition: %d x %d x %d",
3789 comm->npmenodes_x, comm->npmenodes_y, 1);
3793 comm->npmedecompdim = 0;
3794 comm->npmenodes_x = 0;
3795 comm->npmenodes_y = 0;
3798 snew(comm->slb_frac, DIM);
3799 if (isDlbDisabled(comm))
3801 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
3802 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
3803 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
3806 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3808 if (comm->bBondComm || !isDlbDisabled(comm))
3810 /* Set the bonded communication distance to halfway
3811 * the minimum and the maximum,
3812 * since the extra communication cost is nearly zero.
3814 real acs = average_cellsize_min(dd, ddbox);
3815 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3816 if (!isDlbDisabled(comm))
3818 /* Check if this does not limit the scaling */
3819 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3820 options.dlbScaling*acs);
3822 if (!comm->bBondComm)
3824 /* Without bBondComm do not go beyond the n.b. cut-off */
3825 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3826 if (comm->cellsize_limit >= comm->cutoff)
3828 /* We don't loose a lot of efficieny
3829 * when increasing it to the n.b. cut-off.
3830 * It can even be slightly faster, because we need
3831 * less checks for the communication setup.
3833 comm->cutoff_mbody = comm->cutoff;
3836 /* Check if we did not end up below our original limit */
3837 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3839 if (comm->cutoff_mbody > comm->cellsize_limit)
3841 comm->cellsize_limit = comm->cutoff_mbody;
3844 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3849 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3850 "cellsize limit %f\n",
3851 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3854 check_dd_restrictions(dd, ir, mdlog);
3857 static void set_dlb_limits(gmx_domdec_t *dd)
3860 for (int d = 0; d < dd->ndim; d++)
3862 /* Set the number of pulses to the value for DLB */
3863 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3865 dd->comm->cellsize_min[dd->dim[d]] =
3866 dd->comm->cellsize_min_dlb[dd->dim[d]];
3871 static void turn_on_dlb(const gmx::MDLogger &mdlog,
3875 gmx_domdec_comm_t *comm = dd->comm;
3877 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3878 for (int d = 1; d < dd->ndim; d++)
3880 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3883 /* Turn off DLB if we're too close to the cell size limit. */
3884 if (cellsize_min < comm->cellsize_limit*1.05)
3886 GMX_LOG(mdlog.info).appendTextFormatted(
3887 "step %s Measured %.1f %% performance loss due to load imbalance, "
3888 "but the minimum cell size is smaller than 1.05 times the cell size limit. "
3889 "Will no longer try dynamic load balancing.",
3890 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3892 comm->dlbState = DlbState::offForever;
3896 GMX_LOG(mdlog.info).appendTextFormatted(
3897 "step %s Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.",
3898 gmx::toString(step).c_str(), dd_force_imb_perf_loss(dd)*100);
3899 comm->dlbState = DlbState::onCanTurnOff;
3901 /* Store the non-DLB performance, so we can check if DLB actually
3902 * improves performance.
3904 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3905 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3909 /* We can set the required cell size info here,
3910 * so we do not need to communicate this.
3911 * The grid is completely uniform.
3913 for (int d = 0; d < dd->ndim; d++)
3915 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3919 comm->load[d].sum_m = comm->load[d].sum;
3921 int nc = dd->nc[dd->dim[d]];
3922 for (int i = 0; i < nc; i++)
3924 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3927 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3928 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3931 rowMaster->cellFrac[nc] = 1.0;
3936 static void turn_off_dlb(const gmx::MDLogger &mdlog,
3940 GMX_LOG(mdlog.info).appendText(
3941 "step " + gmx::toString(step) + " Turning off dynamic load balancing, because it is degrading performance.");
3942 dd->comm->dlbState = DlbState::offCanTurnOn;
3943 dd->comm->haveTurnedOffDlb = true;
3944 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3947 static void turn_off_dlb_forever(const gmx::MDLogger &mdlog,
3951 GMX_RELEASE_ASSERT(dd->comm->dlbState == DlbState::offCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3952 GMX_LOG(mdlog.info).appendText(
3953 "step " + gmx::toString(step) + " Will no longer try dynamic load balancing, as it degraded performance.");
3954 dd->comm->dlbState = DlbState::offForever;
3957 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3962 ncg = ncg_mtop(mtop);
3963 snew(bLocalCG, ncg);
3964 for (cg = 0; cg < ncg; cg++)
3966 bLocalCG[cg] = FALSE;
3972 void dd_init_bondeds(FILE *fplog,
3974 const gmx_mtop_t *mtop,
3975 const gmx_vsite_t *vsite,
3976 const t_inputrec *ir,
3977 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
3979 gmx_domdec_comm_t *comm;
3981 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
3985 if (comm->bBondComm)
3987 /* Communicate atoms beyond the cut-off for bonded interactions */
3990 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
3992 comm->bLocalCG = init_bLocalCG(mtop);
3996 /* Only communicate atoms based on cut-off */
3997 comm->cglink = nullptr;
3998 comm->bLocalCG = nullptr;
4002 static void writeSettings(gmx::TextWriter *log,
4004 const gmx_mtop_t *mtop,
4005 const t_inputrec *ir,
4006 gmx_bool bDynLoadBal,
4008 const gmx_ddbox_t *ddbox)
4010 gmx_domdec_comm_t *comm;
4019 log->writeString("The maximum number of communication pulses is:");
4020 for (d = 0; d < dd->ndim; d++)
4022 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4024 log->ensureLineBreak();
4025 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm", comm->cellsize_limit);
4026 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
4027 log->writeString("The allowed shrink of domain decomposition cells is:");
4028 for (d = 0; d < DIM; d++)
4032 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4039 comm->cellsize_min_dlb[d]/
4040 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4042 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
4045 log->ensureLineBreak();
4049 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4050 log->writeString("The initial number of communication pulses is:");
4051 for (d = 0; d < dd->ndim; d++)
4053 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4055 log->ensureLineBreak();
4056 log->writeString("The initial domain decomposition cell size is:");
4057 for (d = 0; d < DIM; d++)
4061 log->writeStringFormatted(" %c %.2f nm",
4062 dim2char(d), dd->comm->cellsize_min[d]);
4065 log->ensureLineBreak();
4069 gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
4071 if (comm->bInterCGBondeds ||
4073 dd->bInterCGcons || dd->bInterCGsettles)
4075 log->writeLine("The maximum allowed distance for charge groups involved in interactions is:");
4076 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->cutoff);
4080 limit = dd->comm->cellsize_limit;
4084 if (dynamic_dd_box(ddbox, ir))
4086 log->writeLine("(the following are initial values, they could change due to box deformation)");
4088 limit = dd->comm->cellsize_min[XX];
4089 for (d = 1; d < DIM; d++)
4091 limit = std::min(limit, dd->comm->cellsize_min[d]);
4095 if (comm->bInterCGBondeds)
4097 log->writeLineFormatted("%40s %-7s %6.3f nm",
4098 "two-body bonded interactions", "(-rdd)",
4099 std::max(comm->cutoff, comm->cutoff_mbody));
4100 log->writeLineFormatted("%40s %-7s %6.3f nm",
4101 "multi-body bonded interactions", "(-rdd)",
4102 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4106 log->writeLineFormatted("%40s %-7s %6.3f nm",
4107 "virtual site constructions", "(-rcon)", limit);
4109 if (dd->bInterCGcons || dd->bInterCGsettles)
4111 std::string separation = gmx::formatString("atoms separated by up to %d constraints",
4113 log->writeLineFormatted("%40s %-7s %6.3f nm\n",
4114 separation.c_str(), "(-rcon)", limit);
4116 log->ensureLineBreak();
4120 static void logSettings(const gmx::MDLogger &mdlog,
4122 const gmx_mtop_t *mtop,
4123 const t_inputrec *ir,
4125 const gmx_ddbox_t *ddbox)
4127 gmx::StringOutputStream stream;
4128 gmx::TextWriter log(&stream);
4129 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
4130 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4133 log.ensureEmptyLine();
4134 log.writeLine("When dynamic load balancing gets turned on, these settings will change to:");
4136 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
4138 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
4141 static void set_cell_limits_dlb(const gmx::MDLogger &mdlog,
4144 const t_inputrec *ir,
4145 const gmx_ddbox_t *ddbox)
4147 gmx_domdec_comm_t *comm;
4148 int d, dim, npulse, npulse_d_max, npulse_d;
4153 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4155 /* Determine the maximum number of comm. pulses in one dimension */
4157 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4159 /* Determine the maximum required number of grid pulses */
4160 if (comm->cellsize_limit >= comm->cutoff)
4162 /* Only a single pulse is required */
4165 else if (!bNoCutOff && comm->cellsize_limit > 0)
4167 /* We round down slightly here to avoid overhead due to the latency
4168 * of extra communication calls when the cut-off
4169 * would be only slightly longer than the cell size.
4170 * Later cellsize_limit is redetermined,
4171 * so we can not miss interactions due to this rounding.
4173 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
4177 /* There is no cell size limit */
4178 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4181 if (!bNoCutOff && npulse > 1)
4183 /* See if we can do with less pulses, based on dlb_scale */
4185 for (d = 0; d < dd->ndim; d++)
4188 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
4189 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4190 npulse_d_max = std::max(npulse_d_max, npulse_d);
4192 npulse = std::min(npulse, npulse_d_max);
4195 /* This env var can override npulse */
4196 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
4203 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4204 for (d = 0; d < dd->ndim; d++)
4206 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4207 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4208 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4210 comm->bVacDLBNoLimit = FALSE;
4214 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4215 if (!comm->bVacDLBNoLimit)
4217 comm->cellsize_limit = std::max(comm->cellsize_limit,
4218 comm->cutoff/comm->maxpulse);
4220 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4221 /* Set the minimum cell size for each DD dimension */
4222 for (d = 0; d < dd->ndim; d++)
4224 if (comm->bVacDLBNoLimit ||
4225 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4227 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4231 comm->cellsize_min_dlb[dd->dim[d]] =
4232 comm->cutoff/comm->cd[d].np_dlb;
4235 if (comm->cutoff_mbody <= 0)
4237 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4245 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4247 /* If each molecule is a single charge group
4248 * or we use domain decomposition for each periodic dimension,
4249 * we do not need to take pbc into account for the bonded interactions.
4251 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4254 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4257 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4258 static void set_ddgrid_parameters(const gmx::MDLogger &mdlog,
4259 gmx_domdec_t *dd, real dlb_scale,
4260 const gmx_mtop_t *mtop, const t_inputrec *ir,
4261 const gmx_ddbox_t *ddbox)
4263 gmx_domdec_comm_t *comm;
4269 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4271 init_ddpme(dd, &comm->ddpme[0], 0);
4272 if (comm->npmedecompdim >= 2)
4274 init_ddpme(dd, &comm->ddpme[1], 1);
4279 comm->npmenodes = 0;
4280 if (dd->pme_nodeid >= 0)
4282 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4283 "Can not have separate PME ranks without PME electrostatics");
4289 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4291 if (!isDlbDisabled(comm))
4293 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
4296 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
4298 if (ir->ePBC == epbcNONE)
4300 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
4305 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
4309 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4311 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4313 dd->ga2la = new gmx_ga2la_t(natoms_tot,
4314 static_cast<int>(vol_frac*natoms_tot));
4317 /*! \brief Set some important DD parameters that can be modified by env.vars */
4318 static void set_dd_envvar_options(const gmx::MDLogger &mdlog,
4319 gmx_domdec_t *dd, int rank_mysim)
4321 gmx_domdec_comm_t *comm = dd->comm;
4323 dd->bSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
4324 comm->dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
4325 comm->eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
4326 int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
4327 comm->nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
4328 comm->nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
4329 comm->DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
4333 GMX_LOG(mdlog.info).appendText("Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication");
4338 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
4339 if (comm->eFlop > 1)
4341 srand(1 + rank_mysim);
4343 comm->bRecordLoad = TRUE;
4347 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4351 DomdecOptions::DomdecOptions() :
4352 checkBondedInteractions(TRUE),
4353 useBondedCommunication(TRUE),
4355 rankOrder(DdRankOrder::pp_pme),
4356 minimumCommunicationRange(0),
4357 constraintCommunicationRange(0),
4358 dlbOption(DlbOption::turnOnWhenUseful),
4364 clear_ivec(numCells);
4367 gmx_domdec_t *init_domain_decomposition(const gmx::MDLogger &mdlog,
4369 const DomdecOptions &options,
4370 const MdrunOptions &mdrunOptions,
4371 const gmx_mtop_t *mtop,
4372 const t_inputrec *ir,
4374 gmx::ArrayRef<const gmx::RVec> xGlobal,
4375 gmx::LocalAtomSetManager *atomSets)
4379 GMX_LOG(mdlog.info).appendTextFormatted(
4380 "\nInitializing Domain Decomposition on %d ranks", cr->nnodes);
4382 dd = new gmx_domdec_t;
4384 dd->comm = init_dd_comm();
4386 /* Initialize DD paritioning counters */
4387 dd->comm->partition_step = INT_MIN;
4390 set_dd_envvar_options(mdlog, dd, cr->nodeid);
4392 gmx_ddbox_t ddbox = {0};
4393 set_dd_limits_and_grid(mdlog, cr, dd, options, mdrunOptions,
4398 make_dd_communicators(mdlog, cr, dd, options.rankOrder);
4400 if (thisRankHasDuty(cr, DUTY_PP))
4402 set_ddgrid_parameters(mdlog, dd, options.dlbScaling, mtop, ir, &ddbox);
4404 setup_neighbor_relations(dd);
4407 /* Set overallocation to avoid frequent reallocation of arrays */
4408 set_over_alloc_dd(TRUE);
4410 clear_dd_cycle_counts(dd);
4412 dd->atomSets = atomSets;
4417 static gmx_bool test_dd_cutoff(t_commrec *cr,
4418 t_state *state, const t_inputrec *ir,
4429 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4433 for (d = 0; d < dd->ndim; d++)
4437 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4438 if (dynamic_dd_box(&ddbox, ir))
4440 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4443 np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4445 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4447 if (np > dd->comm->cd[d].np_dlb)
4452 /* If a current local cell size is smaller than the requested
4453 * cut-off, we could still fix it, but this gets very complicated.
4454 * Without fixing here, we might actually need more checks.
4456 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4463 if (!isDlbDisabled(dd->comm))
4465 /* If DLB is not active yet, we don't need to check the grid jumps.
4466 * Actually we shouldn't, because then the grid jump data is not set.
4468 if (isDlbOn(dd->comm) &&
4469 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4474 gmx_sumi(1, &LocallyLimited, cr);
4476 if (LocallyLimited > 0)
4485 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4488 gmx_bool bCutoffAllowed;
4490 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4494 cr->dd->comm->cutoff = cutoff_req;
4497 return bCutoffAllowed;
4500 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4502 gmx_domdec_comm_t *comm;
4504 comm = cr->dd->comm;
4506 /* Turn on the DLB limiting (might have been on already) */
4507 comm->bPMELoadBalDLBLimits = TRUE;
4509 /* Change the cut-off limit */
4510 comm->PMELoadBal_max_cutoff = cutoff;
4514 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4515 comm->PMELoadBal_max_cutoff);
4519 /* Sets whether we should later check the load imbalance data, so that
4520 * we can trigger dynamic load balancing if enough imbalance has
4523 * Used after PME load balancing unlocks DLB, so that the check
4524 * whether DLB will be useful can happen immediately.
4526 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4528 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4530 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4534 /* Store the DD partitioning count, so we can ignore cycle counts
4535 * over the next nstlist steps, which are often slower.
4537 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4542 /* Returns if we should check whether there has been enough load
4543 * imbalance to trigger dynamic load balancing.
4545 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4547 if (dd->comm->dlbState != DlbState::offCanTurnOn)
4552 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4554 /* We ignore the first nstlist steps at the start of the run
4555 * or after PME load balancing or after turning DLB off, since
4556 * these often have extra allocation or cache miss overhead.
4561 if (dd->comm->cycl_n[ddCyclStep] == 0)
4563 /* We can have zero timed steps when dd_partition_system is called
4564 * more than once at the same step, e.g. with replica exchange.
4565 * Turning on DLB would trigger an assertion failure later, but is
4566 * also useless right after exchanging replicas.
4571 /* We should check whether we should use DLB directly after
4573 if (dd->comm->bCheckWhetherToTurnDlbOn)
4575 /* This flag was set when the PME load-balancing routines
4576 unlocked DLB, and should now be cleared. */
4577 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4580 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4581 * partitionings (we do not do this every partioning, so that we
4582 * avoid excessive communication). */
4583 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4586 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4588 return isDlbOn(dd->comm);
4591 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4593 return (dd->comm->dlbState == DlbState::offTemporarilyLocked);
4596 void dd_dlb_lock(gmx_domdec_t *dd)
4598 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4599 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4601 dd->comm->dlbState = DlbState::offTemporarilyLocked;
4605 void dd_dlb_unlock(gmx_domdec_t *dd)
4607 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4608 if (dd->comm->dlbState == DlbState::offTemporarilyLocked)
4610 dd->comm->dlbState = DlbState::offCanTurnOn;
4611 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4615 static void merge_cg_buffers(int ncell,
4616 gmx_domdec_comm_dim_t *cd, int pulse,
4618 gmx::ArrayRef<int> index_gl,
4620 rvec *cg_cm, rvec *recv_vr,
4621 gmx::ArrayRef<int> cgindex,
4622 cginfo_mb_t *cginfo_mb, int *cginfo)
4624 gmx_domdec_ind_t *ind, *ind_p;
4625 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4626 int shift, shift_at;
4628 ind = &cd->ind[pulse];
4630 /* First correct the already stored data */
4631 shift = ind->nrecv[ncell];
4632 for (cell = ncell-1; cell >= 0; cell--)
4634 shift -= ind->nrecv[cell];
4637 /* Move the cg's present from previous grid pulses */
4638 cg0 = ncg_cell[ncell+cell];
4639 cg1 = ncg_cell[ncell+cell+1];
4640 cgindex[cg1+shift] = cgindex[cg1];
4641 for (cg = cg1-1; cg >= cg0; cg--)
4643 index_gl[cg+shift] = index_gl[cg];
4644 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4645 cgindex[cg+shift] = cgindex[cg];
4646 cginfo[cg+shift] = cginfo[cg];
4648 /* Correct the already stored send indices for the shift */
4649 for (p = 1; p <= pulse; p++)
4651 ind_p = &cd->ind[p];
4653 for (c = 0; c < cell; c++)
4655 cg0 += ind_p->nsend[c];
4657 cg1 = cg0 + ind_p->nsend[cell];
4658 for (cg = cg0; cg < cg1; cg++)
4660 ind_p->index[cg] += shift;
4666 /* Merge in the communicated buffers */
4670 for (cell = 0; cell < ncell; cell++)
4672 cg1 = ncg_cell[ncell+cell+1] + shift;
4675 /* Correct the old cg indices */
4676 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4678 cgindex[cg+1] += shift_at;
4681 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4683 /* Copy this charge group from the buffer */
4684 index_gl[cg1] = recv_i[cg0];
4685 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4686 /* Add it to the cgindex */
4687 cg_gl = index_gl[cg1];
4688 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4689 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4690 cgindex[cg1+1] = cgindex[cg1] + nat;
4695 shift += ind->nrecv[cell];
4696 ncg_cell[ncell+cell+1] = cg1;
4700 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4703 const gmx::RangePartitioning &atomGroups)
4705 /* Store the atom block boundaries for easy copying of communication buffers
4707 int g = atomGroupStart;
4708 for (int zone = 0; zone < nzone; zone++)
4710 for (gmx_domdec_ind_t &ind : cd->ind)
4712 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4713 ind.cell2at0[zone] = range.begin();
4714 ind.cell2at1[zone] = range.end();
4715 g += ind.nrecv[zone];
4720 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4726 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4728 if (!bLocalCG[link->a[i]])
4737 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4739 real c[DIM][4]; /* the corners for the non-bonded communication */
4740 real cr0; /* corner for rounding */
4741 real cr1[4]; /* corners for rounding */
4742 real bc[DIM]; /* corners for bounded communication */
4743 real bcr1; /* corner for rounding for bonded communication */
4746 /* Determine the corners of the domain(s) we are communicating with */
4748 set_dd_corners(const gmx_domdec_t *dd,
4749 int dim0, int dim1, int dim2,
4753 const gmx_domdec_comm_t *comm;
4754 const gmx_domdec_zones_t *zones;
4759 zones = &comm->zones;
4761 /* Keep the compiler happy */
4765 /* The first dimension is equal for all cells */
4766 c->c[0][0] = comm->cell_x0[dim0];
4769 c->bc[0] = c->c[0][0];
4774 /* This cell row is only seen from the first row */
4775 c->c[1][0] = comm->cell_x0[dim1];
4776 /* All rows can see this row */
4777 c->c[1][1] = comm->cell_x0[dim1];
4778 if (isDlbOn(dd->comm))
4780 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4783 /* For the multi-body distance we need the maximum */
4784 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4787 /* Set the upper-right corner for rounding */
4788 c->cr0 = comm->cell_x1[dim0];
4793 for (j = 0; j < 4; j++)
4795 c->c[2][j] = comm->cell_x0[dim2];
4797 if (isDlbOn(dd->comm))
4799 /* Use the maximum of the i-cells that see a j-cell */
4800 for (i = 0; i < zones->nizone; i++)
4802 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4807 std::max(c->c[2][j-4],
4808 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4814 /* For the multi-body distance we need the maximum */
4815 c->bc[2] = comm->cell_x0[dim2];
4816 for (i = 0; i < 2; i++)
4818 for (j = 0; j < 2; j++)
4820 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4826 /* Set the upper-right corner for rounding */
4827 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4828 * Only cell (0,0,0) can see cell 7 (1,1,1)
4830 c->cr1[0] = comm->cell_x1[dim1];
4831 c->cr1[3] = comm->cell_x1[dim1];
4832 if (isDlbOn(dd->comm))
4834 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4837 /* For the multi-body distance we need the maximum */
4838 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4845 /* Add the atom groups we need to send in this pulse from this zone to
4846 * \p localAtomGroups and \p work
4849 get_zone_pulse_cgs(gmx_domdec_t *dd,
4850 int zonei, int zone,
4852 gmx::ArrayRef<const int> globalAtomGroupIndices,
4853 const gmx::RangePartitioning &atomGroups,
4854 int dim, int dim_ind,
4855 int dim0, int dim1, int dim2,
4856 real r_comm2, real r_bcomm2,
4858 bool distanceIsTriclinic,
4860 real skew_fac2_d, real skew_fac_01,
4861 rvec *v_d, rvec *v_0, rvec *v_1,
4862 const dd_corners_t *c,
4863 const rvec sf2_round,
4864 gmx_bool bDistBonded,
4870 std::vector<int> *localAtomGroups,
4871 dd_comm_setup_work_t *work)
4873 gmx_domdec_comm_t *comm;
4875 gmx_bool bDistMB_pulse;
4877 real r2, rb2, r, tric_sh;
4884 bScrew = (dd->bScrewPBC && dim == XX);
4886 bDistMB_pulse = (bDistMB && bDistBonded);
4888 /* Unpack the work data */
4889 std::vector<int> &ibuf = work->atomGroupBuffer;
4890 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4894 for (cg = cg0; cg < cg1; cg++)
4898 if (!distanceIsTriclinic)
4900 /* Rectangular direction, easy */
4901 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4908 r = cg_cm[cg][dim] - c->bc[dim_ind];
4914 /* Rounding gives at most a 16% reduction
4915 * in communicated atoms
4917 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4919 r = cg_cm[cg][dim0] - c->cr0;
4920 /* This is the first dimension, so always r >= 0 */
4927 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4929 r = cg_cm[cg][dim1] - c->cr1[zone];
4936 r = cg_cm[cg][dim1] - c->bcr1;
4946 /* Triclinic direction, more complicated */
4949 /* Rounding, conservative as the skew_fac multiplication
4950 * will slightly underestimate the distance.
4952 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4954 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4955 for (i = dim0+1; i < DIM; i++)
4957 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4959 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4962 rb[dim0] = rn[dim0];
4965 /* Take care that the cell planes along dim0 might not
4966 * be orthogonal to those along dim1 and dim2.
4968 for (i = 1; i <= dim_ind; i++)
4971 if (normal[dim0][dimd] > 0)
4973 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
4976 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
4981 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4983 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
4985 for (i = dim1+1; i < DIM; i++)
4987 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
4989 rn[dim1] += tric_sh;
4992 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
4993 /* Take care of coupling of the distances
4994 * to the planes along dim0 and dim1 through dim2.
4996 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
4997 /* Take care that the cell planes along dim1
4998 * might not be orthogonal to that along dim2.
5000 if (normal[dim1][dim2] > 0)
5002 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5008 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5011 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5012 /* Take care of coupling of the distances
5013 * to the planes along dim0 and dim1 through dim2.
5015 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5016 /* Take care that the cell planes along dim1
5017 * might not be orthogonal to that along dim2.
5019 if (normal[dim1][dim2] > 0)
5021 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5026 /* The distance along the communication direction */
5027 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5029 for (i = dim+1; i < DIM; i++)
5031 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5036 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5037 /* Take care of coupling of the distances
5038 * to the planes along dim0 and dim1 through dim2.
5040 if (dim_ind == 1 && zonei == 1)
5042 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5048 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5051 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5052 /* Take care of coupling of the distances
5053 * to the planes along dim0 and dim1 through dim2.
5055 if (dim_ind == 1 && zonei == 1)
5057 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5065 ((bDistMB && rb2 < r_bcomm2) ||
5066 (bDist2B && r2 < r_bcomm2)) &&
5068 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5069 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5072 /* Store the local and global atom group indices and position */
5073 localAtomGroups->push_back(cg);
5074 ibuf.push_back(globalAtomGroupIndices[cg]);
5078 if (dd->ci[dim] == 0)
5080 /* Correct cg_cm for pbc */
5081 rvec_add(cg_cm[cg], box[dim], posPbc);
5084 posPbc[YY] = box[YY][YY] - posPbc[YY];
5085 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5090 copy_rvec(cg_cm[cg], posPbc);
5092 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5094 nat += atomGroups.block(cg).size();
5099 work->nsend_zone = nsend_z;
5102 static void clearCommSetupData(dd_comm_setup_work_t *work)
5104 work->localAtomGroupBuffer.clear();
5105 work->atomGroupBuffer.clear();
5106 work->positionBuffer.clear();
5108 work->nsend_zone = 0;
5111 static void setup_dd_communication(gmx_domdec_t *dd,
5112 matrix box, gmx_ddbox_t *ddbox,
5114 t_state *state, PaddedRVecVector *f)
5116 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5117 int nzone, nzone_send, zone, zonei, cg0, cg1;
5118 int c, i, cg, cg_gl, nrcg;
5119 int *zone_cg_range, pos_cg;
5120 gmx_domdec_comm_t *comm;
5121 gmx_domdec_zones_t *zones;
5122 gmx_domdec_comm_dim_t *cd;
5123 cginfo_mb_t *cginfo_mb;
5124 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5125 real r_comm2, r_bcomm2;
5126 dd_corners_t corners;
5127 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5128 real skew_fac2_d, skew_fac_01;
5133 fprintf(debug, "Setting up DD communication\n");
5138 if (comm->dth.empty())
5140 /* Initialize the thread data.
5141 * This can not be done in init_domain_decomposition,
5142 * as the numbers of threads is determined later.
5144 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5145 comm->dth.resize(numThreads);
5148 switch (fr->cutoff_scheme)
5154 cg_cm = as_rvec_array(state->x.data());
5157 gmx_incons("unimplemented");
5160 bBondComm = comm->bBondComm;
5162 /* Do we need to determine extra distances for multi-body bondeds? */
5163 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5165 /* Do we need to determine extra distances for only two-body bondeds? */
5166 bDist2B = (bBondComm && !bDistMB);
5168 r_comm2 = gmx::square(comm->cutoff);
5169 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5173 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5176 zones = &comm->zones;
5179 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5180 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5182 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5184 /* Triclinic stuff */
5185 normal = ddbox->normal;
5189 v_0 = ddbox->v[dim0];
5190 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5192 /* Determine the coupling coefficient for the distances
5193 * to the cell planes along dim0 and dim1 through dim2.
5194 * This is required for correct rounding.
5197 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5200 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5206 v_1 = ddbox->v[dim1];
5209 zone_cg_range = zones->cg_range;
5210 cginfo_mb = fr->cginfo_mb;
5212 zone_cg_range[0] = 0;
5213 zone_cg_range[1] = dd->ncg_home;
5214 comm->zone_ncg1[0] = dd->ncg_home;
5215 pos_cg = dd->ncg_home;
5217 nat_tot = comm->atomRanges.numHomeAtoms();
5219 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5221 dim = dd->dim[dim_ind];
5222 cd = &comm->cd[dim_ind];
5224 /* Check if we need to compute triclinic distances along this dim */
5225 bool distanceIsTriclinic = false;
5226 for (i = 0; i <= dim_ind; i++)
5228 if (ddbox->tric_dir[dd->dim[i]])
5230 distanceIsTriclinic = true;
5234 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5236 /* No pbc in this dimension, the first node should not comm. */
5244 v_d = ddbox->v[dim];
5245 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5247 cd->receiveInPlace = true;
5248 for (int p = 0; p < cd->numPulses(); p++)
5250 /* Only atoms communicated in the first pulse are used
5251 * for multi-body bonded interactions or for bBondComm.
5253 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5255 gmx_domdec_ind_t *ind = &cd->ind[p];
5257 /* Thread 0 writes in the global index array */
5259 clearCommSetupData(&comm->dth[0]);
5261 for (zone = 0; zone < nzone_send; zone++)
5263 if (dim_ind > 0 && distanceIsTriclinic)
5265 /* Determine slightly more optimized skew_fac's
5267 * This reduces the number of communicated atoms
5268 * by about 10% for 3D DD of rhombic dodecahedra.
5270 for (dimd = 0; dimd < dim; dimd++)
5272 sf2_round[dimd] = 1;
5273 if (ddbox->tric_dir[dimd])
5275 for (i = dd->dim[dimd]+1; i < DIM; i++)
5277 /* If we are shifted in dimension i
5278 * and the cell plane is tilted forward
5279 * in dimension i, skip this coupling.
5281 if (!(zones->shift[nzone+zone][i] &&
5282 ddbox->v[dimd][i][dimd] >= 0))
5285 gmx::square(ddbox->v[dimd][i][dimd]);
5288 sf2_round[dimd] = 1/sf2_round[dimd];
5293 zonei = zone_perm[dim_ind][zone];
5296 /* Here we permutate the zones to obtain a convenient order
5297 * for neighbor searching
5299 cg0 = zone_cg_range[zonei];
5300 cg1 = zone_cg_range[zonei+1];
5304 /* Look only at the cg's received in the previous grid pulse
5306 cg1 = zone_cg_range[nzone+zone+1];
5307 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5310 const int numThreads = static_cast<int>(comm->dth.size());
5311 #pragma omp parallel for num_threads(numThreads) schedule(static)
5312 for (int th = 0; th < numThreads; th++)
5316 dd_comm_setup_work_t &work = comm->dth[th];
5318 /* Retain data accumulated into buffers of thread 0 */
5321 clearCommSetupData(&work);
5324 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5325 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5327 /* Get the cg's for this pulse in this zone */
5328 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5329 dd->globalAtomGroupIndices,
5331 dim, dim_ind, dim0, dim1, dim2,
5333 box, distanceIsTriclinic,
5334 normal, skew_fac2_d, skew_fac_01,
5335 v_d, v_0, v_1, &corners, sf2_round,
5336 bDistBonded, bBondComm,
5339 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5342 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5345 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5346 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5347 ind->nsend[zone] = comm->dth[0].nsend_zone;
5348 /* Append data of threads>=1 to the communication buffers */
5349 for (int th = 1; th < numThreads; th++)
5351 const dd_comm_setup_work_t &dth = comm->dth[th];
5353 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5354 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5355 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5356 comm->dth[0].nat += dth.nat;
5357 ind->nsend[zone] += dth.nsend_zone;
5360 /* Clear the counts in case we do not have pbc */
5361 for (zone = nzone_send; zone < nzone; zone++)
5363 ind->nsend[zone] = 0;
5365 ind->nsend[nzone] = ind->index.size();
5366 ind->nsend[nzone + 1] = comm->dth[0].nat;
5367 /* Communicate the number of cg's and atoms to receive */
5368 ddSendrecv(dd, dim_ind, dddirBackward,
5369 ind->nsend, nzone+2,
5370 ind->nrecv, nzone+2);
5374 /* We can receive in place if only the last zone is not empty */
5375 for (zone = 0; zone < nzone-1; zone++)
5377 if (ind->nrecv[zone] > 0)
5379 cd->receiveInPlace = false;
5384 int receiveBufferSize = 0;
5385 if (!cd->receiveInPlace)
5387 receiveBufferSize = ind->nrecv[nzone];
5389 /* These buffer are actually only needed with in-place */
5390 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5391 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5393 dd_comm_setup_work_t &work = comm->dth[0];
5395 /* Make space for the global cg indices */
5396 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5397 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5398 /* Communicate the global cg indices */
5399 gmx::ArrayRef<int> integerBufferRef;
5400 if (cd->receiveInPlace)
5402 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5406 integerBufferRef = globalAtomGroupBuffer.buffer;
5408 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5409 work.atomGroupBuffer, integerBufferRef);
5411 /* Make space for cg_cm */
5412 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5413 if (fr->cutoff_scheme == ecutsGROUP)
5419 cg_cm = as_rvec_array(state->x.data());
5421 /* Communicate cg_cm */
5422 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5423 if (cd->receiveInPlace)
5425 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5429 rvecBufferRef = rvecBuffer.buffer;
5431 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5432 work.positionBuffer, rvecBufferRef);
5434 /* Make the charge group index */
5435 if (cd->receiveInPlace)
5437 zone = (p == 0 ? 0 : nzone - 1);
5438 while (zone < nzone)
5440 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5442 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5443 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5444 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5445 dd->atomGrouping_.appendBlock(nrcg);
5448 /* Update the charge group presence,
5449 * so we can use it in the next pass of the loop.
5451 comm->bLocalCG[cg_gl] = TRUE;
5457 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5460 zone_cg_range[nzone+zone] = pos_cg;
5465 /* This part of the code is never executed with bBondComm. */
5466 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5467 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5469 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5470 dd->globalAtomGroupIndices, integerBufferRef.data(),
5471 cg_cm, as_rvec_array(rvecBufferRef.data()),
5473 fr->cginfo_mb, fr->cginfo);
5474 pos_cg += ind->nrecv[nzone];
5476 nat_tot += ind->nrecv[nzone+1];
5478 if (!cd->receiveInPlace)
5480 /* Store the atom block for easy copying of communication buffers */
5481 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5486 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5490 /* We don't need to update cginfo, since that was alrady done above.
5491 * So we pass NULL for the forcerec.
5493 dd_set_cginfo(dd->globalAtomGroupIndices,
5494 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5495 nullptr, comm->bLocalCG);
5500 fprintf(debug, "Finished setting up DD communication, zones:");
5501 for (c = 0; c < zones->n; c++)
5503 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5505 fprintf(debug, "\n");
5509 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5513 for (c = 0; c < zones->nizone; c++)
5515 zones->izone[c].cg1 = zones->cg_range[c+1];
5516 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5517 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5521 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5523 * Also sets the atom density for the home zone when \p zone_start=0.
5524 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5525 * how many charge groups will move but are still part of the current range.
5526 * \todo When converting domdec to use proper classes, all these variables
5527 * should be private and a method should return the correct count
5528 * depending on an internal state.
5530 * \param[in,out] dd The domain decomposition struct
5531 * \param[in] box The box
5532 * \param[in] ddbox The domain decomposition box struct
5533 * \param[in] zone_start The start of the zone range to set sizes for
5534 * \param[in] zone_end The end of the zone range to set sizes for
5535 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5537 static void set_zones_size(gmx_domdec_t *dd,
5538 matrix box, const gmx_ddbox_t *ddbox,
5539 int zone_start, int zone_end,
5540 int numMovedChargeGroupsInHomeZone)
5542 gmx_domdec_comm_t *comm;
5543 gmx_domdec_zones_t *zones;
5552 zones = &comm->zones;
5554 /* Do we need to determine extra distances for multi-body bondeds? */
5555 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5557 for (z = zone_start; z < zone_end; z++)
5559 /* Copy cell limits to zone limits.
5560 * Valid for non-DD dims and non-shifted dims.
5562 copy_rvec(comm->cell_x0, zones->size[z].x0);
5563 copy_rvec(comm->cell_x1, zones->size[z].x1);
5566 for (d = 0; d < dd->ndim; d++)
5570 for (z = 0; z < zones->n; z++)
5572 /* With a staggered grid we have different sizes
5573 * for non-shifted dimensions.
5575 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5579 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5580 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5584 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5585 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5591 rcmbs = comm->cutoff_mbody;
5592 if (ddbox->tric_dir[dim])
5594 rcs /= ddbox->skew_fac[dim];
5595 rcmbs /= ddbox->skew_fac[dim];
5598 /* Set the lower limit for the shifted zone dimensions */
5599 for (z = zone_start; z < zone_end; z++)
5601 if (zones->shift[z][dim] > 0)
5604 if (!isDlbOn(dd->comm) || d == 0)
5606 zones->size[z].x0[dim] = comm->cell_x1[dim];
5607 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5611 /* Here we take the lower limit of the zone from
5612 * the lowest domain of the zone below.
5616 zones->size[z].x0[dim] =
5617 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5623 zones->size[z].x0[dim] =
5624 zones->size[zone_perm[2][z-4]].x0[dim];
5628 zones->size[z].x0[dim] =
5629 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5632 /* A temporary limit, is updated below */
5633 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5637 for (zi = 0; zi < zones->nizone; zi++)
5639 if (zones->shift[zi][dim] == 0)
5641 /* This takes the whole zone into account.
5642 * With multiple pulses this will lead
5643 * to a larger zone then strictly necessary.
5645 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5646 zones->size[zi].x1[dim]+rcmbs);
5654 /* Loop over the i-zones to set the upper limit of each
5657 for (zi = 0; zi < zones->nizone; zi++)
5659 if (zones->shift[zi][dim] == 0)
5661 /* We should only use zones up to zone_end */
5662 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5663 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5665 if (zones->shift[z][dim] > 0)
5667 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5668 zones->size[zi].x1[dim]+rcs);
5675 for (z = zone_start; z < zone_end; z++)
5677 /* Initialization only required to keep the compiler happy */
5678 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5681 /* To determine the bounding box for a zone we need to find
5682 * the extreme corners of 4, 2 or 1 corners.
5684 nc = 1 << (ddbox->nboundeddim - 1);
5686 for (c = 0; c < nc; c++)
5688 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5692 corner[YY] = zones->size[z].x0[YY];
5696 corner[YY] = zones->size[z].x1[YY];
5700 corner[ZZ] = zones->size[z].x0[ZZ];
5704 corner[ZZ] = zones->size[z].x1[ZZ];
5706 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5707 box[ZZ][1 - dd->dim[0]] != 0)
5709 /* With 1D domain decomposition the cg's are not in
5710 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5711 * Shift the corner of the z-vector back to along the box
5712 * vector of dimension d, so it will later end up at 0 along d.
5713 * This can affect the location of this corner along dd->dim[0]
5714 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5716 int d = 1 - dd->dim[0];
5718 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5720 /* Apply the triclinic couplings */
5721 assert(ddbox->npbcdim <= DIM);
5722 for (i = YY; i < ddbox->npbcdim; i++)
5724 for (j = XX; j < i; j++)
5726 corner[j] += corner[i]*box[i][j]/box[i][i];
5731 copy_rvec(corner, corner_min);
5732 copy_rvec(corner, corner_max);
5736 for (i = 0; i < DIM; i++)
5738 corner_min[i] = std::min(corner_min[i], corner[i]);
5739 corner_max[i] = std::max(corner_max[i], corner[i]);
5743 /* Copy the extreme cornes without offset along x */
5744 for (i = 0; i < DIM; i++)
5746 zones->size[z].bb_x0[i] = corner_min[i];
5747 zones->size[z].bb_x1[i] = corner_max[i];
5749 /* Add the offset along x */
5750 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5751 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5754 if (zone_start == 0)
5757 for (dim = 0; dim < DIM; dim++)
5759 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5761 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5766 for (z = zone_start; z < zone_end; z++)
5768 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5770 zones->size[z].x0[XX], zones->size[z].x1[XX],
5771 zones->size[z].x0[YY], zones->size[z].x1[YY],
5772 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5773 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5775 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5776 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5777 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5782 static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
5787 return a.ind_gl < b.ind_gl;
5789 return a.nsc < b.nsc;
5792 /* Order data in \p dataToSort according to \p sort
5794 * Note: both buffers should have at least \p sort.size() elements.
5796 template <typename T>
5798 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5799 gmx::ArrayRef<T> dataToSort,
5800 gmx::ArrayRef<T> sortBuffer)
5802 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5803 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5805 /* Order the data into the temporary buffer */
5807 for (const gmx_cgsort_t &entry : sort)
5809 sortBuffer[i++] = dataToSort[entry.ind];
5812 /* Copy back to the original array */
5813 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5814 dataToSort.begin());
5817 /* Order data in \p dataToSort according to \p sort
5819 * Note: \p vectorToSort should have at least \p sort.size() elements,
5820 * \p workVector is resized when it is too small.
5822 template <typename T>
5824 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5825 gmx::ArrayRef<T> vectorToSort,
5826 std::vector<T> *workVector)
5828 if (gmx::index(workVector->size()) < sort.size())
5830 workVector->resize(sort.size());
5832 orderVector<T>(sort, vectorToSort, *workVector);
5835 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5836 gmx::ArrayRef<const gmx_cgsort_t> sort,
5837 gmx::ArrayRef<gmx::RVec> v,
5838 gmx::ArrayRef<gmx::RVec> buf)
5840 if (atomGroups == nullptr)
5842 /* Avoid the useless loop of the atoms within a cg */
5843 orderVector(sort, v, buf);
5848 /* Order the data */
5850 for (const gmx_cgsort_t &entry : sort)
5852 for (int i : atomGroups->block(entry.ind))
5854 copy_rvec(v[i], buf[a]);
5860 /* Copy back to the original array */
5861 for (int a = 0; a < atot; a++)
5863 copy_rvec(buf[a], v[a]);
5867 /* Returns whether a < b */
5868 static bool compareCgsort(const gmx_cgsort_t &a,
5869 const gmx_cgsort_t &b)
5871 return (a.nsc < b.nsc ||
5872 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5875 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5876 gmx::ArrayRef<gmx_cgsort_t> moved,
5877 std::vector<gmx_cgsort_t> *sort1)
5879 /* The new indices are not very ordered, so we qsort them */
5880 std::sort(moved.begin(), moved.end(), comp_cgsort);
5882 /* stationary is already ordered, so now we can merge the two arrays */
5883 sort1->resize(stationary.size() + moved.size());
5884 std::merge(stationary.begin(), stationary.end(),
5885 moved.begin(), moved.end(),
5890 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5891 * The order is according to the global charge group index.
5892 * This adds and removes charge groups that moved between domains.
5894 static void dd_sort_order(const gmx_domdec_t *dd,
5895 const t_forcerec *fr,
5897 gmx_domdec_sort_t *sort)
5899 const int *a = fr->ns->grid->cell_index;
5901 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5903 if (ncg_home_old >= 0)
5905 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5906 std::vector<gmx_cgsort_t> &moved = sort->moved;
5908 /* The charge groups that remained in the same ns grid cell
5909 * are completely ordered. So we can sort efficiently by sorting
5910 * the charge groups that did move into the stationary list.
5911 * Note: push_back() seems to be slightly slower than direct access.
5915 for (int i = 0; i < dd->ncg_home; i++)
5917 /* Check if this cg did not move to another node */
5918 if (a[i] < movedValue)
5922 entry.ind_gl = dd->globalAtomGroupIndices[i];
5925 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5927 /* This cg is new on this node or moved ns grid cell */
5928 moved.push_back(entry);
5932 /* This cg did not move */
5933 stationary.push_back(entry);
5940 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5941 stationary.size(), moved.size());
5943 /* Sort efficiently */
5944 orderedSort(stationary, moved, &sort->sorted);
5948 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5950 cgsort.reserve(dd->ncg_home);
5952 for (int i = 0; i < dd->ncg_home; i++)
5954 /* Sort on the ns grid cell indices
5955 * and the global topology index
5959 entry.ind_gl = dd->globalAtomGroupIndices[i];
5961 cgsort.push_back(entry);
5962 if (cgsort[i].nsc < movedValue)
5969 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
5971 /* Determine the order of the charge groups using qsort */
5972 std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
5974 /* Remove the charge groups which are no longer at home here */
5975 cgsort.resize(numCGNew);
5979 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
5980 static void dd_sort_order_nbnxn(const t_forcerec *fr,
5981 std::vector<gmx_cgsort_t> *sort)
5983 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
5985 /* Using push_back() instead of this resize results in much slower code */
5986 sort->resize(atomOrder.size());
5987 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
5988 size_t numSorted = 0;
5989 for (int i : atomOrder)
5993 /* The values of nsc and ind_gl are not used in this case */
5994 buffer[numSorted++].ind = i;
5997 sort->resize(numSorted);
6000 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6003 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6005 switch (fr->cutoff_scheme)
6008 dd_sort_order(dd, fr, ncg_home_old, sort);
6011 dd_sort_order_nbnxn(fr, &sort->sorted);
6014 gmx_incons("unimplemented");
6017 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6019 /* We alloc with the old size, since cgindex is still old */
6020 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6021 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6023 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6025 /* Set the new home atom/charge group count */
6026 dd->ncg_home = sort->sorted.size();
6029 fprintf(debug, "Set the new home charge group count to %d\n",
6033 /* Reorder the state */
6034 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6035 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6037 if (state->flags & (1 << estX))
6039 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6041 if (state->flags & (1 << estV))
6043 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6045 if (state->flags & (1 << estCGP))
6047 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6050 if (fr->cutoff_scheme == ecutsGROUP)
6053 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6054 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6057 /* Reorder the global cg index */
6058 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6059 /* Reorder the cginfo */
6060 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6061 /* Rebuild the local cg index */
6064 /* We make a new, ordered atomGroups object and assign it to
6065 * the old one. This causes some allocation overhead, but saves
6066 * a copy back of the whole index.
6068 gmx::RangePartitioning ordered;
6069 for (const gmx_cgsort_t &entry : cgsort)
6071 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6073 dd->atomGrouping_ = ordered;
6077 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6079 /* Set the home atom number */
6080 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6082 if (fr->cutoff_scheme == ecutsVERLET)
6084 /* The atoms are now exactly in grid order, update the grid order */
6085 nbnxn_set_atomorder(fr->nbv->nbs.get());
6089 /* Copy the sorted ns cell indices back to the ns grid struct */
6090 for (gmx::index i = 0; i < cgsort.size(); i++)
6092 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6094 fr->ns->grid->nr = cgsort.size();
6098 static void add_dd_statistics(gmx_domdec_t *dd)
6100 gmx_domdec_comm_t *comm = dd->comm;
6102 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6104 auto range = static_cast<DDAtomRanges::Type>(i);
6106 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6111 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6113 gmx_domdec_comm_t *comm = dd->comm;
6115 /* Reset all the statistics and counters for total run counting */
6116 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6118 comm->sum_nat[i] = 0;
6122 comm->load_step = 0;
6125 clear_ivec(comm->load_lim);
6130 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6132 gmx_domdec_comm_t *comm = cr->dd->comm;
6134 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6135 gmx_sumd(numRanges, comm->sum_nat, cr);
6137 if (fplog == nullptr)
6142 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6144 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6146 auto range = static_cast<DDAtomRanges::Type>(i);
6147 double av = comm->sum_nat[i]/comm->ndecomp;
6150 case DDAtomRanges::Type::Zones:
6152 " av. #atoms communicated per step for force: %d x %.1f\n",
6155 case DDAtomRanges::Type::Vsites:
6156 if (cr->dd->vsite_comm)
6159 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6160 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6164 case DDAtomRanges::Type::Constraints:
6165 if (cr->dd->constraint_comm)
6168 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6169 1 + ir->nLincsIter, av);
6173 gmx_incons(" Unknown type for DD statistics");
6176 fprintf(fplog, "\n");
6178 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6180 print_dd_load_av(fplog, cr->dd);
6184 // TODO Remove fplog when group scheme and charge groups are gone
6185 void dd_partition_system(FILE *fplog,
6186 const gmx::MDLogger &mdlog,
6188 const t_commrec *cr,
6189 gmx_bool bMasterState,
6191 t_state *state_global,
6192 const gmx_mtop_t *top_global,
6193 const t_inputrec *ir,
6194 t_state *state_local,
6195 PaddedRVecVector *f,
6196 gmx::MDAtoms *mdAtoms,
6197 gmx_localtop_t *top_local,
6200 gmx::Constraints *constr,
6202 gmx_wallcycle *wcycle,
6206 gmx_domdec_comm_t *comm;
6207 gmx_ddbox_t ddbox = {0};
6209 int64_t step_pcoupl;
6210 rvec cell_ns_x0, cell_ns_x1;
6211 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6212 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6213 gmx_bool bRedist, bResortAll;
6214 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6218 wallcycle_start(wcycle, ewcDOMDEC);
6223 // TODO if the update code becomes accessible here, use
6224 // upd->deform for this logic.
6225 bBoxChanged = (bMasterState || inputrecDeform(ir));
6226 if (ir->epc != epcNO)
6228 /* With nstpcouple > 1 pressure coupling happens.
6229 * one step after calculating the pressure.
6230 * Box scaling happens at the end of the MD step,
6231 * after the DD partitioning.
6232 * We therefore have to do DLB in the first partitioning
6233 * after an MD step where P-coupling occurred.
6234 * We need to determine the last step in which p-coupling occurred.
6235 * MRS -- need to validate this for vv?
6237 int n = ir->nstpcouple;
6240 step_pcoupl = step - 1;
6244 step_pcoupl = ((step - 1)/n)*n + 1;
6246 if (step_pcoupl >= comm->partition_step)
6252 bNStGlobalComm = (step % nstglobalcomm == 0);
6260 /* Should we do dynamic load balacing this step?
6261 * Since it requires (possibly expensive) global communication,
6262 * we might want to do DLB less frequently.
6264 if (bBoxChanged || ir->epc != epcNO)
6266 bDoDLB = bBoxChanged;
6270 bDoDLB = bNStGlobalComm;
6274 /* Check if we have recorded loads on the nodes */
6275 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6277 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6279 /* Print load every nstlog, first and last step to the log file */
6280 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6281 comm->n_load_collect == 0 ||
6283 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6285 /* Avoid extra communication due to verbose screen output
6286 * when nstglobalcomm is set.
6288 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6289 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6291 get_load_distribution(dd, wcycle);
6296 GMX_LOG(mdlog.info).asParagraph().appendText(dd_print_load(dd, step-1));
6300 dd_print_load_verbose(dd);
6303 comm->n_load_collect++;
6309 /* Add the measured cycles to the running average */
6310 const float averageFactor = 0.1f;
6311 comm->cyclesPerStepDlbExpAverage =
6312 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6313 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6315 if (comm->dlbState == DlbState::onCanTurnOff &&
6316 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6318 gmx_bool turnOffDlb;
6321 /* If the running averaged cycles with DLB are more
6322 * than before we turned on DLB, turn off DLB.
6323 * We will again run and check the cycles without DLB
6324 * and we can then decide if to turn off DLB forever.
6326 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6327 comm->cyclesPerStepBeforeDLB);
6329 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6332 /* To turn off DLB, we need to redistribute the atoms */
6333 dd_collect_state(dd, state_local, state_global);
6334 bMasterState = TRUE;
6335 turn_off_dlb(mdlog, dd, step);
6339 else if (bCheckWhetherToTurnDlbOn)
6341 gmx_bool turnOffDlbForever = FALSE;
6342 gmx_bool turnOnDlb = FALSE;
6344 /* Since the timings are node dependent, the master decides */
6347 /* If we recently turned off DLB, we want to check if
6348 * performance is better without DLB. We want to do this
6349 * ASAP to minimize the chance that external factors
6350 * slowed down the DLB step are gone here and we
6351 * incorrectly conclude that DLB was causing the slowdown.
6352 * So we measure one nstlist block, no running average.
6354 if (comm->haveTurnedOffDlb &&
6355 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6356 comm->cyclesPerStepDlbExpAverage)
6358 /* After turning off DLB we ran nstlist steps in fewer
6359 * cycles than with DLB. This likely means that DLB
6360 * in not benefical, but this could be due to a one
6361 * time unlucky fluctuation, so we require two such
6362 * observations in close succession to turn off DLB
6365 if (comm->dlbSlowerPartitioningCount > 0 &&
6366 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6368 turnOffDlbForever = TRUE;
6370 comm->haveTurnedOffDlb = false;
6371 /* Register when we last measured DLB slowdown */
6372 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6376 /* Here we check if the max PME rank load is more than 0.98
6377 * the max PP force load. If so, PP DLB will not help,
6378 * since we are (almost) limited by PME. Furthermore,
6379 * DLB will cause a significant extra x/f redistribution
6380 * cost on the PME ranks, which will then surely result
6381 * in lower total performance.
6383 if (cr->npmenodes > 0 &&
6384 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6390 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6396 gmx_bool turnOffDlbForever;
6400 turnOffDlbForever, turnOnDlb
6402 dd_bcast(dd, sizeof(bools), &bools);
6403 if (bools.turnOffDlbForever)
6405 turn_off_dlb_forever(mdlog, dd, step);
6407 else if (bools.turnOnDlb)
6409 turn_on_dlb(mdlog, dd, step);
6414 comm->n_load_have++;
6417 cgs_gl = &comm->cgs_gl;
6422 /* Clear the old state */
6423 clearDDStateIndices(dd, 0, 0);
6426 auto xGlobal = positionsFromStatePointer(state_global);
6428 set_ddbox(dd, true, ir,
6429 DDMASTER(dd) ? state_global->box : nullptr,
6433 distributeState(mdlog, dd, state_global, ddbox, state_local, f);
6435 dd_make_local_cgs(dd, &top_local->cgs);
6437 /* Ensure that we have space for the new distribution */
6438 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6440 if (fr->cutoff_scheme == ecutsGROUP)
6442 calc_cgcm(fplog, 0, dd->ncg_home,
6443 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6446 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6448 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6450 else if (state_local->ddp_count != dd->ddp_count)
6452 if (state_local->ddp_count > dd->ddp_count)
6454 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%" PRId64 ")", state_local->ddp_count, dd->ddp_count);
6457 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6459 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6462 /* Clear the old state */
6463 clearDDStateIndices(dd, 0, 0);
6465 /* Restore the atom group indices from state_local */
6466 restoreAtomGroups(dd, cgs_gl->index, state_local);
6467 make_dd_indices(dd, cgs_gl->index, 0);
6468 ncgindex_set = dd->ncg_home;
6470 if (fr->cutoff_scheme == ecutsGROUP)
6472 /* Redetermine the cg COMs */
6473 calc_cgcm(fplog, 0, dd->ncg_home,
6474 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6477 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6479 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6481 set_ddbox(dd, bMasterState, ir, state_local->box,
6482 true, state_local->x, &ddbox);
6484 bRedist = isDlbOn(comm);
6488 /* We have the full state, only redistribute the cgs */
6490 /* Clear the non-home indices */
6491 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6494 /* Avoid global communication for dim's without pbc and -gcom */
6495 if (!bNStGlobalComm)
6497 copy_rvec(comm->box0, ddbox.box0 );
6498 copy_rvec(comm->box_size, ddbox.box_size);
6500 set_ddbox(dd, bMasterState, ir, state_local->box,
6501 bNStGlobalComm, state_local->x, &ddbox);
6506 /* For dim's without pbc and -gcom */
6507 copy_rvec(ddbox.box0, comm->box0 );
6508 copy_rvec(ddbox.box_size, comm->box_size);
6510 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6513 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6515 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6518 /* Check if we should sort the charge groups */
6519 const bool bSortCG = (bMasterState || bRedist);
6521 ncg_home_old = dd->ncg_home;
6523 /* When repartitioning we mark charge groups that will move to neighboring
6524 * DD cells, but we do not move them right away for performance reasons.
6525 * Thus we need to keep track of how many charge groups will move for
6526 * obtaining correct local charge group / atom counts.
6531 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6533 ncgindex_set = dd->ncg_home;
6534 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6538 GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
6540 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6543 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6545 &comm->cell_x0, &comm->cell_x1,
6546 dd->ncg_home, fr->cg_cm,
6547 cell_ns_x0, cell_ns_x1, &grid_density);
6551 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6554 switch (fr->cutoff_scheme)
6557 copy_ivec(fr->ns->grid->n, ncells_old);
6558 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6559 state_local->box, cell_ns_x0, cell_ns_x1,
6560 fr->rlist, grid_density);
6563 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6566 gmx_incons("unimplemented");
6568 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6569 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6573 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6575 /* Sort the state on charge group position.
6576 * This enables exact restarts from this step.
6577 * It also improves performance by about 15% with larger numbers
6578 * of atoms per node.
6581 /* Fill the ns grid with the home cell,
6582 * so we can sort with the indices.
6584 set_zones_ncg_home(dd);
6586 switch (fr->cutoff_scheme)
6589 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6591 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6593 comm->zones.size[0].bb_x0,
6594 comm->zones.size[0].bb_x1,
6596 comm->zones.dens_zone0,
6598 as_rvec_array(state_local->x.data()),
6599 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6600 fr->nbv->grp[eintLocal].kernel_type,
6603 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6606 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6607 0, dd->ncg_home, fr->cg_cm);
6609 copy_ivec(fr->ns->grid->n, ncells_new);
6612 gmx_incons("unimplemented");
6615 bResortAll = bMasterState;
6617 /* Check if we can user the old order and ns grid cell indices
6618 * of the charge groups to sort the charge groups efficiently.
6620 if (ncells_new[XX] != ncells_old[XX] ||
6621 ncells_new[YY] != ncells_old[YY] ||
6622 ncells_new[ZZ] != ncells_old[ZZ])
6629 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6630 gmx_step_str(step, sbuf), dd->ncg_home);
6632 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6633 bResortAll ? -1 : ncg_home_old);
6635 /* After sorting and compacting we set the correct size */
6636 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6638 /* Rebuild all the indices */
6642 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6645 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6647 /* Setup up the communication and communicate the coordinates */
6648 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6650 /* Set the indices */
6651 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6653 /* Set the charge group boundaries for neighbor searching */
6654 set_cg_boundaries(&comm->zones);
6656 if (fr->cutoff_scheme == ecutsVERLET)
6658 /* When bSortCG=true, we have already set the size for zone 0 */
6659 set_zones_size(dd, state_local->box, &ddbox,
6660 bSortCG ? 1 : 0, comm->zones.n,
6664 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6667 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6668 -1,as_rvec_array(state_local->x.data()),state_local->box);
6671 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6673 /* Extract a local topology from the global topology */
6674 for (int i = 0; i < dd->ndim; i++)
6676 np[dd->dim[i]] = comm->cd[i].numPulses();
6678 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6679 comm->cellsize_min, np,
6681 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6682 vsite, top_global, top_local);
6684 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6686 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6688 /* Set up the special atom communication */
6689 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6690 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6692 auto range = static_cast<DDAtomRanges::Type>(i);
6695 case DDAtomRanges::Type::Vsites:
6696 if (vsite && vsite->n_intercg_vsite)
6698 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6701 case DDAtomRanges::Type::Constraints:
6702 if (dd->bInterCGcons || dd->bInterCGsettles)
6704 /* Only for inter-cg constraints we need special code */
6705 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6706 constr, ir->nProjOrder,
6707 top_local->idef.il);
6711 gmx_incons("Unknown special atom type setup");
6713 comm->atomRanges.setEnd(range, n);
6716 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6718 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6720 /* Make space for the extra coordinates for virtual site
6721 * or constraint communication.
6723 state_local->natoms = comm->atomRanges.numAtomsTotal();
6725 dd_resize_state(state_local, f, state_local->natoms);
6727 if (fr->haveDirectVirialContributions)
6729 if (vsite && vsite->n_intercg_vsite)
6731 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6735 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6737 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6741 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6750 /* Set the number of atoms required for the force calculation.
6751 * Forces need to be constrained when doing energy
6752 * minimization. For simple simulations we could avoid some
6753 * allocation, zeroing and copying, but this is probably not worth
6754 * the complications and checking.
6756 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6757 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6758 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6761 /* Update atom data for mdatoms and several algorithms */
6762 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6763 nullptr, mdAtoms, constr, vsite, nullptr);
6765 auto mdatoms = mdAtoms->mdatoms();
6766 if (!thisRankHasDuty(cr, DUTY_PME))
6768 /* Send the charges and/or c6/sigmas to our PME only node */
6769 gmx_pme_send_parameters(cr,
6771 mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
6772 mdatoms->chargeA, mdatoms->chargeB,
6773 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6774 mdatoms->sigmaA, mdatoms->sigmaB,
6775 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6780 /* Update the local pull groups */
6781 dd_make_local_pull_groups(cr, ir->pull_work);
6784 if (dd->atomSets != nullptr)
6786 /* Update the local atom sets */
6787 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6790 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6791 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6793 add_dd_statistics(dd);
6795 /* Make sure we only count the cycles for this DD partitioning */
6796 clear_dd_cycle_counts(dd);
6798 /* Because the order of the atoms might have changed since
6799 * the last vsite construction, we need to communicate the constructing
6800 * atom coordinates again (for spreading the forces this MD step).
6802 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6804 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6806 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6808 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6809 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6810 -1, as_rvec_array(state_local->x.data()), state_local->box);
6813 /* Store the partitioning step */
6814 comm->partition_step = step;
6816 /* Increase the DD partitioning counter */
6818 /* The state currently matches this DD partitioning count, store it */
6819 state_local->ddp_count = dd->ddp_count;
6822 /* The DD master node knows the complete cg distribution,
6823 * store the count so we can possibly skip the cg info communication.
6825 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6828 if (comm->DD_debug > 0)
6830 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6831 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6832 "after partitioning");
6835 wallcycle_stop(wcycle, ewcDOMDEC);
6838 /*! \brief Check whether bonded interactions are missing, if appropriate */
6839 void checkNumberOfBondedInteractions(const gmx::MDLogger &mdlog,
6841 int totalNumberOfBondedInteractions,
6842 const gmx_mtop_t *top_global,
6843 const gmx_localtop_t *top_local,
6844 const t_state *state,
6845 bool *shouldCheckNumberOfBondedInteractions)
6847 if (*shouldCheckNumberOfBondedInteractions)
6849 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6851 dd_print_missing_interactions(mdlog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6853 *shouldCheckNumberOfBondedInteractions = false;