2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/compat/make_unique.h"
52 #include "gromacs/domdec/collect.h"
53 #include "gromacs/domdec/dlbtiming.h"
54 #include "gromacs/domdec/domdec_network.h"
55 #include "gromacs/domdec/ga2la.h"
56 #include "gromacs/domdec/localatomsetmanager.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/calc_verletbuf.h"
71 #include "gromacs/mdlib/constr.h"
72 #include "gromacs/mdlib/constraintrange.h"
73 #include "gromacs/mdlib/forcerec.h"
74 #include "gromacs/mdlib/gmx_omp_nthreads.h"
75 #include "gromacs/mdlib/lincs.h"
76 #include "gromacs/mdlib/mdatoms.h"
77 #include "gromacs/mdlib/mdrun.h"
78 #include "gromacs/mdlib/mdsetup.h"
79 #include "gromacs/mdlib/nb_verlet.h"
80 #include "gromacs/mdlib/nbnxn_grid.h"
81 #include "gromacs/mdlib/nsgrid.h"
82 #include "gromacs/mdlib/vsite.h"
83 #include "gromacs/mdtypes/commrec.h"
84 #include "gromacs/mdtypes/df_history.h"
85 #include "gromacs/mdtypes/forcerec.h"
86 #include "gromacs/mdtypes/inputrec.h"
87 #include "gromacs/mdtypes/md_enums.h"
88 #include "gromacs/mdtypes/mdatom.h"
89 #include "gromacs/mdtypes/nblist.h"
90 #include "gromacs/mdtypes/state.h"
91 #include "gromacs/pbcutil/ishift.h"
92 #include "gromacs/pbcutil/pbc.h"
93 #include "gromacs/pulling/pull.h"
94 #include "gromacs/timing/wallcycle.h"
95 #include "gromacs/topology/block.h"
96 #include "gromacs/topology/idef.h"
97 #include "gromacs/topology/ifunc.h"
98 #include "gromacs/topology/mtop_lookup.h"
99 #include "gromacs/topology/mtop_util.h"
100 #include "gromacs/topology/topology.h"
101 #include "gromacs/utility/basedefinitions.h"
102 #include "gromacs/utility/basenetwork.h"
103 #include "gromacs/utility/cstringutil.h"
104 #include "gromacs/utility/exceptions.h"
105 #include "gromacs/utility/fatalerror.h"
106 #include "gromacs/utility/gmxmpi.h"
107 #include "gromacs/utility/real.h"
108 #include "gromacs/utility/smalloc.h"
109 #include "gromacs/utility/strconvert.h"
110 #include "gromacs/utility/stringutil.h"
112 #include "atomdistribution.h"
113 #include "cellsizes.h"
114 #include "distribute.h"
115 #include "domdec_constraints.h"
116 #include "domdec_internal.h"
117 #include "domdec_vsite.h"
118 #include "redistribute.h"
121 #define DD_NLOAD_MAX 9
123 static const char *edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
125 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
128 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
129 #define DD_FLAG_NRCG 65535
130 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
131 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
133 /* The DD zone order */
134 static const ivec dd_zo[DD_MAXZONE] =
135 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
137 /* The non-bonded zone-pair setup for domain decomposition
138 * The first number is the i-zone, the second number the first j-zone seen by
139 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
140 * As is, this is for 3D decomposition, where there are 4 i-zones.
141 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
142 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
145 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
150 /* Turn on DLB when the load imbalance causes this amount of total loss.
151 * There is a bit of overhead with DLB and it's difficult to achieve
152 * a load imbalance of less than 2% with DLB.
154 #define DD_PERF_LOSS_DLB_ON 0.02
156 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
157 #define DD_PERF_LOSS_WARN 0.05
160 /* We check if to turn on DLB at the first and every 100 DD partitionings.
161 * With large imbalance DLB will turn on at the first step, so we can
162 * make the interval so large that the MPI overhead of the check is negligible.
164 static const int c_checkTurnDlbOnInterval = 100;
165 /* We need to check if DLB results in worse performance and then turn it off.
166 * We check this more often then for turning DLB on, because the DLB can scale
167 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
168 * and furthermore, we are already synchronizing often with DLB, so
169 * the overhead of the MPI Bcast is not that high.
171 static const int c_checkTurnDlbOffInterval = 20;
173 /* Forward declaration */
174 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
178 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
180 static void index2xyz(ivec nc,int ind,ivec xyz)
182 xyz[XX] = ind % nc[XX];
183 xyz[YY] = (ind / nc[XX]) % nc[YY];
184 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
188 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
190 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
191 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
192 xyz[ZZ] = ind % nc[ZZ];
195 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
200 ddindex = dd_index(dd->nc, c);
201 if (dd->comm->bCartesianPP_PME)
203 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
205 else if (dd->comm->bCartesianPP)
208 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
219 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
221 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
224 int ddglatnr(const gmx_domdec_t *dd, int i)
234 if (i >= dd->comm->atomRanges.numAtomsTotal())
236 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
238 atnr = dd->globalAtomIndices[i] + 1;
244 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
246 return &dd->comm->cgs_gl;
249 void dd_store_state(gmx_domdec_t *dd, t_state *state)
253 if (state->ddp_count != dd->ddp_count)
255 gmx_incons("The MD state does not match the domain decomposition state");
258 state->cg_gl.resize(dd->ncg_home);
259 for (i = 0; i < dd->ncg_home; i++)
261 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
264 state->ddp_count_cg_gl = dd->ddp_count;
267 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
269 return &dd->comm->zones;
272 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
273 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
275 gmx_domdec_zones_t *zones;
278 zones = &dd->comm->zones;
281 while (icg >= zones->izone[izone].cg1)
290 else if (izone < zones->nizone)
292 *jcg0 = zones->izone[izone].jcg0;
296 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
297 icg, izone, zones->nizone);
300 *jcg1 = zones->izone[izone].jcg1;
302 for (d = 0; d < dd->ndim; d++)
305 shift0[dim] = zones->izone[izone].shift0[dim];
306 shift1[dim] = zones->izone[izone].shift1[dim];
307 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
309 /* A conservative approach, this can be optimized */
316 int dd_numHomeAtoms(const gmx_domdec_t &dd)
318 return dd.comm->atomRanges.numHomeAtoms();
321 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
323 /* We currently set mdatoms entries for all atoms:
324 * local + non-local + communicated for vsite + constraints
327 return dd->comm->atomRanges.numAtomsTotal();
330 int dd_natoms_vsite(const gmx_domdec_t *dd)
332 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
335 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
337 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
338 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
341 void dd_move_x(gmx_domdec_t *dd,
343 gmx::ArrayRef<gmx::RVec> x,
344 gmx_wallcycle *wcycle)
346 wallcycle_start(wcycle, ewcMOVEX);
349 gmx_domdec_comm_t *comm;
350 gmx_domdec_comm_dim_t *cd;
351 rvec shift = {0, 0, 0};
352 gmx_bool bPBC, bScrew;
356 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
359 nat_tot = comm->atomRanges.numHomeAtoms();
360 for (int d = 0; d < dd->ndim; d++)
362 bPBC = (dd->ci[dd->dim[d]] == 0);
363 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
366 copy_rvec(box[dd->dim[d]], shift);
369 for (const gmx_domdec_ind_t &ind : cd->ind)
371 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
372 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
376 for (int g : ind.index)
378 for (int j : atomGrouping.block(g))
380 sendBuffer[n] = x[j];
387 for (int g : ind.index)
389 for (int j : atomGrouping.block(g))
391 /* We need to shift the coordinates */
392 for (int d = 0; d < DIM; d++)
394 sendBuffer[n][d] = x[j][d] + shift[d];
402 for (int g : ind.index)
404 for (int j : atomGrouping.block(g))
407 sendBuffer[n][XX] = x[j][XX] + shift[XX];
409 * This operation requires a special shift force
410 * treatment, which is performed in calc_vir.
412 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
413 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
419 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
421 gmx::ArrayRef<gmx::RVec> receiveBuffer;
422 if (cd->receiveInPlace)
424 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
428 receiveBuffer = receiveBufferAccess.buffer;
430 /* Send and receive the coordinates */
431 ddSendrecv(dd, d, dddirBackward,
432 sendBuffer, receiveBuffer);
434 if (!cd->receiveInPlace)
437 for (int zone = 0; zone < nzone; zone++)
439 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
441 x[i] = receiveBuffer[j++];
445 nat_tot += ind.nrecv[nzone+1];
450 wallcycle_stop(wcycle, ewcMOVEX);
453 void dd_move_f(gmx_domdec_t *dd,
454 gmx::ArrayRef<gmx::RVec> f,
456 gmx_wallcycle *wcycle)
458 wallcycle_start(wcycle, ewcMOVEF);
461 gmx_domdec_comm_t *comm;
462 gmx_domdec_comm_dim_t *cd;
465 gmx_bool bShiftForcesNeedPbc, bScrew;
469 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
471 nzone = comm->zones.n/2;
472 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
473 for (int d = dd->ndim-1; d >= 0; d--)
475 /* Only forces in domains near the PBC boundaries need to
476 consider PBC in the treatment of fshift */
477 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
478 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
479 if (fshift == nullptr && !bScrew)
481 bShiftForcesNeedPbc = FALSE;
483 /* Determine which shift vector we need */
489 for (int p = cd->numPulses() - 1; p >= 0; p--)
491 const gmx_domdec_ind_t &ind = cd->ind[p];
492 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
493 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
495 nat_tot -= ind.nrecv[nzone+1];
497 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
499 gmx::ArrayRef<gmx::RVec> sendBuffer;
500 if (cd->receiveInPlace)
502 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
506 sendBuffer = sendBufferAccess.buffer;
508 for (int zone = 0; zone < nzone; zone++)
510 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
512 sendBuffer[j++] = f[i];
516 /* Communicate the forces */
517 ddSendrecv(dd, d, dddirForward,
518 sendBuffer, receiveBuffer);
519 /* Add the received forces */
521 if (!bShiftForcesNeedPbc)
523 for (int g : ind.index)
525 for (int j : atomGrouping.block(g))
527 for (int d = 0; d < DIM; d++)
529 f[j][d] += receiveBuffer[n][d];
537 /* fshift should always be defined if this function is
538 * called when bShiftForcesNeedPbc is true */
539 assert(nullptr != fshift);
540 for (int g : ind.index)
542 for (int j : atomGrouping.block(g))
544 for (int d = 0; d < DIM; d++)
546 f[j][d] += receiveBuffer[n][d];
548 /* Add this force to the shift force */
549 for (int d = 0; d < DIM; d++)
551 fshift[is][d] += receiveBuffer[n][d];
559 for (int g : ind.index)
561 for (int j : atomGrouping.block(g))
563 /* Rotate the force */
564 f[j][XX] += receiveBuffer[n][XX];
565 f[j][YY] -= receiveBuffer[n][YY];
566 f[j][ZZ] -= receiveBuffer[n][ZZ];
569 /* Add this force to the shift force */
570 for (int d = 0; d < DIM; d++)
572 fshift[is][d] += receiveBuffer[n][d];
582 wallcycle_stop(wcycle, ewcMOVEF);
585 /* Convenience function for extracting a real buffer from an rvec buffer
587 * To reduce the number of temporary communication buffers and avoid
588 * cache polution, we reuse gmx::RVec buffers for storing reals.
589 * This functions return a real buffer reference with the same number
590 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
592 static gmx::ArrayRef<real>
593 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
595 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
599 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
602 gmx_domdec_comm_t *comm;
603 gmx_domdec_comm_dim_t *cd;
607 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
610 nat_tot = comm->atomRanges.numHomeAtoms();
611 for (int d = 0; d < dd->ndim; d++)
614 for (const gmx_domdec_ind_t &ind : cd->ind)
616 /* Note: We provision for RVec instead of real, so a factor of 3
617 * more than needed. The buffer actually already has this size
618 * and we pass a plain pointer below, so this does not matter.
620 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
621 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
623 for (int g : ind.index)
625 for (int j : atomGrouping.block(g))
627 sendBuffer[n++] = v[j];
631 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
633 gmx::ArrayRef<real> receiveBuffer;
634 if (cd->receiveInPlace)
636 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
640 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
642 /* Send and receive the data */
643 ddSendrecv(dd, d, dddirBackward,
644 sendBuffer, receiveBuffer);
645 if (!cd->receiveInPlace)
648 for (int zone = 0; zone < nzone; zone++)
650 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
652 v[i] = receiveBuffer[j++];
656 nat_tot += ind.nrecv[nzone+1];
662 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
665 gmx_domdec_comm_t *comm;
666 gmx_domdec_comm_dim_t *cd;
670 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
672 nzone = comm->zones.n/2;
673 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
674 for (int d = dd->ndim-1; d >= 0; d--)
677 for (int p = cd->numPulses() - 1; p >= 0; p--)
679 const gmx_domdec_ind_t &ind = cd->ind[p];
681 /* Note: We provision for RVec instead of real, so a factor of 3
682 * more than needed. The buffer actually already has this size
683 * and we typecast, so this works as intended.
685 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
686 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
687 nat_tot -= ind.nrecv[nzone + 1];
689 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
691 gmx::ArrayRef<real> sendBuffer;
692 if (cd->receiveInPlace)
694 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
698 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
700 for (int zone = 0; zone < nzone; zone++)
702 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
704 sendBuffer[j++] = v[i];
708 /* Communicate the forces */
709 ddSendrecv(dd, d, dddirForward,
710 sendBuffer, receiveBuffer);
711 /* Add the received forces */
713 for (int g : ind.index)
715 for (int j : atomGrouping.block(g))
717 v[j] += receiveBuffer[n];
726 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
728 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
730 zone->min0, zone->max1,
731 zone->mch0, zone->mch0,
732 zone->p1_0, zone->p1_1);
735 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
736 * takes the extremes over all home and remote zones in the halo
737 * and returns the results in cell_ns_x0 and cell_ns_x1.
738 * Note: only used with the group cut-off scheme.
740 static void dd_move_cellx(gmx_domdec_t *dd,
741 const gmx_ddbox_t *ddbox,
745 constexpr int c_ddZoneCommMaxNumZones = 5;
746 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
747 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
748 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
749 gmx_domdec_comm_t *comm = dd->comm;
753 for (int d = 1; d < dd->ndim; d++)
755 int dim = dd->dim[d];
756 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
758 /* Copy the base sizes of the home zone */
759 zp.min0 = cell_ns_x0[dim];
760 zp.max1 = cell_ns_x1[dim];
761 zp.min1 = cell_ns_x1[dim];
762 zp.mch0 = cell_ns_x0[dim];
763 zp.mch1 = cell_ns_x1[dim];
764 zp.p1_0 = cell_ns_x0[dim];
765 zp.p1_1 = cell_ns_x1[dim];
768 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
770 /* Loop backward over the dimensions and aggregate the extremes
773 for (int d = dd->ndim - 2; d >= 0; d--)
775 const int dim = dd->dim[d];
776 const bool applyPbc = (dim < ddbox->npbcdim);
778 /* Use an rvec to store two reals */
779 extr_s[d][0] = cellsizes[d + 1].fracLower;
780 extr_s[d][1] = cellsizes[d + 1].fracUpper;
781 extr_s[d][2] = cellsizes[d + 1].fracUpper;
784 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
785 /* Store the extremes in the backward sending buffer,
786 * so they get updated separately from the forward communication.
788 for (int d1 = d; d1 < dd->ndim-1; d1++)
790 /* We invert the order to be able to use the same loop for buf_e */
791 buf_s[pos].min0 = extr_s[d1][1];
792 buf_s[pos].max1 = extr_s[d1][0];
793 buf_s[pos].min1 = extr_s[d1][2];
796 /* Store the cell corner of the dimension we communicate along */
797 buf_s[pos].p1_0 = comm->cell_x0[dim];
802 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
805 if (dd->ndim == 3 && d == 0)
807 buf_s[pos] = comm->zone_d2[0][1];
809 buf_s[pos] = comm->zone_d1[0];
813 /* We only need to communicate the extremes
814 * in the forward direction
816 int numPulses = comm->cd[d].numPulses();
820 /* Take the minimum to avoid double communication */
821 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
825 /* Without PBC we should really not communicate over
826 * the boundaries, but implementing that complicates
827 * the communication setup and therefore we simply
828 * do all communication, but ignore some data.
830 numPulsesMin = numPulses;
832 for (int pulse = 0; pulse < numPulsesMin; pulse++)
834 /* Communicate the extremes forward */
835 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
837 int numElements = dd->ndim - d - 1;
838 ddSendrecv(dd, d, dddirForward,
839 extr_s + d, numElements,
840 extr_r + d, numElements);
842 if (receiveValidData)
844 for (int d1 = d; d1 < dd->ndim - 1; d1++)
846 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
847 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
848 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
853 const int numElementsInBuffer = pos;
854 for (int pulse = 0; pulse < numPulses; pulse++)
856 /* Communicate all the zone information backward */
857 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
859 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
861 int numReals = numElementsInBuffer*c_ddzoneNumReals;
862 ddSendrecv(dd, d, dddirBackward,
863 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
864 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
869 for (int d1 = d + 1; d1 < dd->ndim; d1++)
871 /* Determine the decrease of maximum required
872 * communication height along d1 due to the distance along d,
873 * this avoids a lot of useless atom communication.
875 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
878 if (ddbox->tric_dir[dim])
880 /* c is the off-diagonal coupling between the cell planes
881 * along directions d and d1.
883 c = ddbox->v[dim][dd->dim[d1]][dim];
889 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
892 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
896 /* A negative value signals out of range */
902 /* Accumulate the extremes over all pulses */
903 for (int i = 0; i < numElementsInBuffer; i++)
911 if (receiveValidData)
913 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
914 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
915 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
919 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
927 if (receiveValidData && dh[d1] >= 0)
929 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
930 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
933 /* Copy the received buffer to the send buffer,
934 * to pass the data through with the next pulse.
938 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
939 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
941 /* Store the extremes */
944 for (int d1 = d; d1 < dd->ndim-1; d1++)
946 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
947 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
948 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
952 if (d == 1 || (d == 0 && dd->ndim == 3))
954 for (int i = d; i < 2; i++)
956 comm->zone_d2[1-d][i] = buf_e[pos];
962 comm->zone_d1[1] = buf_e[pos];
971 int dim = dd->dim[1];
972 for (int i = 0; i < 2; i++)
976 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
978 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
979 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
984 int dim = dd->dim[2];
985 for (int i = 0; i < 2; i++)
987 for (int j = 0; j < 2; j++)
991 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
993 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
994 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
998 for (int d = 1; d < dd->ndim; d++)
1000 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1001 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1004 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1005 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1010 static void write_dd_grid_pdb(const char *fn, int64_t step,
1011 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1013 rvec grid_s[2], *grid_r = nullptr, cx, r;
1014 char fname[STRLEN], buf[22];
1016 int a, i, d, z, y, x;
1020 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1021 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1025 snew(grid_r, 2*dd->nnodes);
1028 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1032 for (d = 0; d < DIM; d++)
1034 for (i = 0; i < DIM; i++)
1042 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1044 tric[d][i] = box[i][d]/box[i][i];
1053 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1054 out = gmx_fio_fopen(fname, "w");
1055 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1057 for (i = 0; i < dd->nnodes; i++)
1059 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1060 for (d = 0; d < DIM; d++)
1062 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1064 for (z = 0; z < 2; z++)
1066 for (y = 0; y < 2; y++)
1068 for (x = 0; x < 2; x++)
1070 cx[XX] = grid_r[i*2+x][XX];
1071 cx[YY] = grid_r[i*2+y][YY];
1072 cx[ZZ] = grid_r[i*2+z][ZZ];
1074 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1075 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1079 for (d = 0; d < DIM; d++)
1081 for (x = 0; x < 4; x++)
1085 case 0: y = 1 + i*8 + 2*x; break;
1086 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1087 case 2: y = 1 + i*8 + x; break;
1089 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1093 gmx_fio_fclose(out);
1098 void write_dd_pdb(const char *fn, int64_t step, const char *title,
1099 const gmx_mtop_t *mtop, const t_commrec *cr,
1100 int natoms, const rvec x[], const matrix box)
1102 char fname[STRLEN], buf[22];
1105 const char *atomname, *resname;
1111 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1114 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1116 out = gmx_fio_fopen(fname, "w");
1118 fprintf(out, "TITLE %s\n", title);
1119 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1121 for (int i = 0; i < natoms; i++)
1123 int ii = dd->globalAtomIndices[i];
1124 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1127 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1130 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1136 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1138 b = dd->comm->zones.n;
1142 b = dd->comm->zones.n + 1;
1144 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1145 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1147 fprintf(out, "TER\n");
1149 gmx_fio_fclose(out);
1152 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1154 gmx_domdec_comm_t *comm;
1161 if (comm->bInterCGBondeds)
1163 if (comm->cutoff_mbody > 0)
1165 r = comm->cutoff_mbody;
1169 /* cutoff_mbody=0 means we do not have DLB */
1170 r = comm->cellsize_min[dd->dim[0]];
1171 for (di = 1; di < dd->ndim; di++)
1173 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1175 if (comm->bBondComm)
1177 r = std::max(r, comm->cutoff_mbody);
1181 r = std::min(r, comm->cutoff);
1189 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1193 r_mb = dd_cutoff_multibody(dd);
1195 return std::max(dd->comm->cutoff, r_mb);
1199 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1204 nc = dd->nc[dd->comm->cartpmedim];
1205 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1206 copy_ivec(coord, coord_pme);
1207 coord_pme[dd->comm->cartpmedim] =
1208 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1211 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1216 npme = dd->comm->npmenodes;
1218 /* Here we assign a PME node to communicate with this DD node
1219 * by assuming that the major index of both is x.
1220 * We add cr->npmenodes/2 to obtain an even distribution.
1222 return (ddindex*npme + npme/2)/npp;
1225 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1230 snew(pme_rank, dd->comm->npmenodes);
1232 for (i = 0; i < dd->nnodes; i++)
1234 p0 = ddindex2pmeindex(dd, i);
1235 p1 = ddindex2pmeindex(dd, i+1);
1236 if (i+1 == dd->nnodes || p1 > p0)
1240 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1242 pme_rank[n] = i + 1 + n;
1250 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1258 if (dd->comm->bCartesian) {
1259 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1260 dd_coords2pmecoords(dd,coords,coords_pme);
1261 copy_ivec(dd->ntot,nc);
1262 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1263 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1265 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1267 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1273 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1278 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1280 gmx_domdec_comm_t *comm;
1282 int ddindex, nodeid = -1;
1284 comm = cr->dd->comm;
1289 if (comm->bCartesianPP_PME)
1292 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1297 ddindex = dd_index(cr->dd->nc, coords);
1298 if (comm->bCartesianPP)
1300 nodeid = comm->ddindex2simnodeid[ddindex];
1306 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1318 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1319 const t_commrec gmx_unused *cr,
1324 const gmx_domdec_comm_t *comm = dd->comm;
1326 /* This assumes a uniform x domain decomposition grid cell size */
1327 if (comm->bCartesianPP_PME)
1330 ivec coord, coord_pme;
1331 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1332 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1334 /* This is a PP node */
1335 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1336 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1340 else if (comm->bCartesianPP)
1342 if (sim_nodeid < dd->nnodes)
1344 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1349 /* This assumes DD cells with identical x coordinates
1350 * are numbered sequentially.
1352 if (dd->comm->pmenodes == nullptr)
1354 if (sim_nodeid < dd->nnodes)
1356 /* The DD index equals the nodeid */
1357 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1363 while (sim_nodeid > dd->comm->pmenodes[i])
1367 if (sim_nodeid < dd->comm->pmenodes[i])
1369 pmenode = dd->comm->pmenodes[i];
1377 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1382 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1393 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1397 ivec coord, coord_pme;
1401 std::vector<int> ddranks;
1402 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1404 for (x = 0; x < dd->nc[XX]; x++)
1406 for (y = 0; y < dd->nc[YY]; y++)
1408 for (z = 0; z < dd->nc[ZZ]; z++)
1410 if (dd->comm->bCartesianPP_PME)
1415 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1416 if (dd->ci[XX] == coord_pme[XX] &&
1417 dd->ci[YY] == coord_pme[YY] &&
1418 dd->ci[ZZ] == coord_pme[ZZ])
1420 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1425 /* The slab corresponds to the nodeid in the PME group */
1426 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1428 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1437 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1439 gmx_bool bReceive = TRUE;
1441 if (cr->npmenodes < dd->nnodes)
1443 gmx_domdec_comm_t *comm = dd->comm;
1444 if (comm->bCartesianPP_PME)
1447 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1449 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1450 coords[comm->cartpmedim]++;
1451 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1454 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1455 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1457 /* This is not the last PP node for pmenode */
1462 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1467 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1468 if (cr->sim_nodeid+1 < cr->nnodes &&
1469 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1471 /* This is not the last PP node for pmenode */
1480 static void set_zones_ncg_home(gmx_domdec_t *dd)
1482 gmx_domdec_zones_t *zones;
1485 zones = &dd->comm->zones;
1487 zones->cg_range[0] = 0;
1488 for (i = 1; i < zones->n+1; i++)
1490 zones->cg_range[i] = dd->ncg_home;
1492 /* zone_ncg1[0] should always be equal to ncg_home */
1493 dd->comm->zone_ncg1[0] = dd->ncg_home;
1496 static void restoreAtomGroups(gmx_domdec_t *dd,
1497 const int *gcgs_index, const t_state *state)
1499 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1501 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1502 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1504 globalAtomGroupIndices.resize(atomGroupsState.size());
1505 atomGrouping.clear();
1507 /* Copy back the global charge group indices from state
1508 * and rebuild the local charge group to atom index.
1510 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1512 const int atomGroupGlobal = atomGroupsState[i];
1513 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1514 globalAtomGroupIndices[i] = atomGroupGlobal;
1515 atomGrouping.appendBlock(groupSize);
1518 dd->ncg_home = atomGroupsState.size();
1519 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1521 set_zones_ncg_home(dd);
1524 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1525 t_forcerec *fr, char *bLocalCG)
1527 cginfo_mb_t *cginfo_mb;
1533 cginfo_mb = fr->cginfo_mb;
1534 cginfo = fr->cginfo;
1536 for (cg = cg0; cg < cg1; cg++)
1538 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1542 if (bLocalCG != nullptr)
1544 for (cg = cg0; cg < cg1; cg++)
1546 bLocalCG[index_gl[cg]] = TRUE;
1551 static void make_dd_indices(gmx_domdec_t *dd,
1552 const int *gcgs_index, int cg_start)
1554 const int numZones = dd->comm->zones.n;
1555 const int *zone2cg = dd->comm->zones.cg_range;
1556 const int *zone_ncg1 = dd->comm->zone_ncg1;
1557 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1558 const gmx_bool bCGs = dd->comm->bCGs;
1560 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1561 gmx_ga2la_t &ga2la = *dd->ga2la;
1563 if (zone2cg[1] != dd->ncg_home)
1565 gmx_incons("dd->ncg_zone is not up to date");
1568 /* Make the local to global and global to local atom index */
1569 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1570 globalAtomIndices.resize(a);
1571 for (int zone = 0; zone < numZones; zone++)
1580 cg0 = zone2cg[zone];
1582 int cg1 = zone2cg[zone+1];
1583 int cg1_p1 = cg0 + zone_ncg1[zone];
1585 for (int cg = cg0; cg < cg1; cg++)
1590 /* Signal that this cg is from more than one pulse away */
1593 int cg_gl = globalAtomGroupIndices[cg];
1596 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1598 globalAtomIndices.push_back(a_gl);
1599 ga2la.insert(a_gl, {a, zone1});
1605 globalAtomIndices.push_back(cg_gl);
1606 ga2la.insert(cg_gl, {a, zone1});
1613 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1617 if (bLocalCG == nullptr)
1621 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1623 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1626 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1631 for (int i = 0; i < ncg_sys; i++)
1638 if (ngl != dd->globalAtomGroupIndices.size())
1640 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1647 static void check_index_consistency(gmx_domdec_t *dd,
1648 int natoms_sys, int ncg_sys,
1653 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1655 if (dd->comm->DD_debug > 1)
1657 std::vector<int> have(natoms_sys);
1658 for (int a = 0; a < numAtomsInZones; a++)
1660 int globalAtomIndex = dd->globalAtomIndices[a];
1661 if (have[globalAtomIndex] > 0)
1663 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1667 have[globalAtomIndex] = a + 1;
1672 std::vector<int> have(numAtomsInZones);
1675 for (int i = 0; i < natoms_sys; i++)
1677 if (const auto entry = dd->ga2la->find(i))
1679 const int a = entry->la;
1680 if (a >= numAtomsInZones)
1682 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1688 if (dd->globalAtomIndices[a] != i)
1690 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1697 if (ngl != numAtomsInZones)
1700 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1701 dd->rank, where, ngl, numAtomsInZones);
1703 for (int a = 0; a < numAtomsInZones; a++)
1708 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1709 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1713 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1717 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1718 dd->rank, where, nerr);
1722 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1723 static void clearDDStateIndices(gmx_domdec_t *dd,
1727 gmx_ga2la_t &ga2la = *dd->ga2la;
1731 /* Clear the whole list without the overhead of searching */
1736 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1737 for (int i = 0; i < numAtomsInZones; i++)
1739 ga2la.erase(dd->globalAtomIndices[i]);
1743 char *bLocalCG = dd->comm->bLocalCG;
1746 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1748 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1752 dd_clear_local_vsite_indices(dd);
1754 if (dd->constraints)
1756 dd_clear_local_constraint_indices(dd);
1760 static bool check_grid_jump(int64_t step,
1761 const gmx_domdec_t *dd,
1763 const gmx_ddbox_t *ddbox,
1766 gmx_domdec_comm_t *comm = dd->comm;
1767 bool invalid = false;
1769 for (int d = 1; d < dd->ndim; d++)
1771 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1772 const int dim = dd->dim[d];
1773 const real limit = grid_jump_limit(comm, cutoff, d);
1774 real bfac = ddbox->box_size[dim];
1775 if (ddbox->tric_dir[dim])
1777 bfac *= ddbox->skew_fac[dim];
1779 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1780 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1788 /* This error should never be triggered under normal
1789 * circumstances, but you never know ...
1791 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1792 gmx_step_str(step, buf),
1793 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1801 static float dd_force_load(gmx_domdec_comm_t *comm)
1808 if (comm->eFlop > 1)
1810 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1815 load = comm->cycl[ddCyclF];
1816 if (comm->cycl_n[ddCyclF] > 1)
1818 /* Subtract the maximum of the last n cycle counts
1819 * to get rid of possible high counts due to other sources,
1820 * for instance system activity, that would otherwise
1821 * affect the dynamic load balancing.
1823 load -= comm->cycl_max[ddCyclF];
1827 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1829 float gpu_wait, gpu_wait_sum;
1831 gpu_wait = comm->cycl[ddCyclWaitGPU];
1832 if (comm->cycl_n[ddCyclF] > 1)
1834 /* We should remove the WaitGPU time of the same MD step
1835 * as the one with the maximum F time, since the F time
1836 * and the wait time are not independent.
1837 * Furthermore, the step for the max F time should be chosen
1838 * the same on all ranks that share the same GPU.
1839 * But to keep the code simple, we remove the average instead.
1840 * The main reason for artificially long times at some steps
1841 * is spurious CPU activity or MPI time, so we don't expect
1842 * that changes in the GPU wait time matter a lot here.
1844 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
1846 /* Sum the wait times over the ranks that share the same GPU */
1847 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1848 comm->mpi_comm_gpu_shared);
1849 /* Replace the wait time by the average over the ranks */
1850 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1858 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1860 gmx_domdec_comm_t *comm;
1865 snew(*dim_f, dd->nc[dim]+1);
1867 for (i = 1; i < dd->nc[dim]; i++)
1869 if (comm->slb_frac[dim])
1871 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1875 (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
1878 (*dim_f)[dd->nc[dim]] = 1;
1881 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1883 int pmeindex, slab, nso, i;
1886 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1892 ddpme->dim = dimind;
1894 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1896 ddpme->nslab = (ddpme->dim == 0 ?
1897 dd->comm->npmenodes_x :
1898 dd->comm->npmenodes_y);
1900 if (ddpme->nslab <= 1)
1905 nso = dd->comm->npmenodes/ddpme->nslab;
1906 /* Determine for each PME slab the PP location range for dimension dim */
1907 snew(ddpme->pp_min, ddpme->nslab);
1908 snew(ddpme->pp_max, ddpme->nslab);
1909 for (slab = 0; slab < ddpme->nslab; slab++)
1911 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1912 ddpme->pp_max[slab] = 0;
1914 for (i = 0; i < dd->nnodes; i++)
1916 ddindex2xyz(dd->nc, i, xyz);
1917 /* For y only use our y/z slab.
1918 * This assumes that the PME x grid size matches the DD grid size.
1920 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1922 pmeindex = ddindex2pmeindex(dd, i);
1925 slab = pmeindex/nso;
1929 slab = pmeindex % ddpme->nslab;
1931 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1932 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1936 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1939 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1941 if (dd->comm->ddpme[0].dim == XX)
1943 return dd->comm->ddpme[0].maxshift;
1951 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1953 if (dd->comm->ddpme[0].dim == YY)
1955 return dd->comm->ddpme[0].maxshift;
1957 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1959 return dd->comm->ddpme[1].maxshift;
1967 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1969 rvec cell_ns_x0, rvec cell_ns_x1,
1972 gmx_domdec_comm_t *comm;
1977 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1979 dim = dd->dim[dim_ind];
1981 /* Without PBC we don't have restrictions on the outer cells */
1982 if (!(dim >= ddbox->npbcdim &&
1983 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1985 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1986 comm->cellsize_min[dim])
1989 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1990 gmx_step_str(step, buf), dim2char(dim),
1991 comm->cell_x1[dim] - comm->cell_x0[dim],
1992 ddbox->skew_fac[dim],
1993 dd->comm->cellsize_min[dim],
1994 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1998 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2000 /* Communicate the boundaries and update cell_ns_x0/1 */
2001 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2002 if (isDlbOn(dd->comm) && dd->ndim > 1)
2004 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2009 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2011 /* Note that the cycles value can be incorrect, either 0 or some
2012 * extremely large value, when our thread migrated to another core
2013 * with an unsynchronized cycle counter. If this happens less often
2014 * that once per nstlist steps, this will not cause issues, since
2015 * we later subtract the maximum value from the sum over nstlist steps.
2016 * A zero count will slightly lower the total, but that's a small effect.
2017 * Note that the main purpose of the subtraction of the maximum value
2018 * is to avoid throwing off the load balancing when stalls occur due
2019 * e.g. system activity or network congestion.
2021 dd->comm->cycl[ddCycl] += cycles;
2022 dd->comm->cycl_n[ddCycl]++;
2023 if (cycles > dd->comm->cycl_max[ddCycl])
2025 dd->comm->cycl_max[ddCycl] = cycles;
2029 static double force_flop_count(t_nrnb *nrnb)
2036 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2038 /* To get closer to the real timings, we half the count
2039 * for the normal loops and again half it for water loops.
2042 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2044 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2048 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2051 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2054 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2056 sum += nrnb->n[i]*cost_nrnb(i);
2059 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2061 sum += nrnb->n[i]*cost_nrnb(i);
2067 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2069 if (dd->comm->eFlop)
2071 dd->comm->flop -= force_flop_count(nrnb);
2074 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2076 if (dd->comm->eFlop)
2078 dd->comm->flop += force_flop_count(nrnb);
2083 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2087 for (i = 0; i < ddCyclNr; i++)
2089 dd->comm->cycl[i] = 0;
2090 dd->comm->cycl_n[i] = 0;
2091 dd->comm->cycl_max[i] = 0;
2094 dd->comm->flop_n = 0;
2097 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2099 gmx_domdec_comm_t *comm;
2100 domdec_load_t *load;
2101 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2106 fprintf(debug, "get_load_distribution start\n");
2109 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2113 bSepPME = (dd->pme_nodeid >= 0);
2115 if (dd->ndim == 0 && bSepPME)
2117 /* Without decomposition, but with PME nodes, we need the load */
2118 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2119 comm->load[0].pme = comm->cycl[ddCyclPME];
2122 for (int d = dd->ndim - 1; d >= 0; d--)
2124 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2125 const int dim = dd->dim[d];
2126 /* Check if we participate in the communication in this dimension */
2127 if (d == dd->ndim-1 ||
2128 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2130 load = &comm->load[d];
2131 if (isDlbOn(dd->comm))
2133 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2136 if (d == dd->ndim-1)
2138 sbuf[pos++] = dd_force_load(comm);
2139 sbuf[pos++] = sbuf[0];
2140 if (isDlbOn(dd->comm))
2142 sbuf[pos++] = sbuf[0];
2143 sbuf[pos++] = cell_frac;
2146 sbuf[pos++] = cellsizes.fracLowerMax;
2147 sbuf[pos++] = cellsizes.fracUpperMin;
2152 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2153 sbuf[pos++] = comm->cycl[ddCyclPME];
2158 sbuf[pos++] = comm->load[d+1].sum;
2159 sbuf[pos++] = comm->load[d+1].max;
2160 if (isDlbOn(dd->comm))
2162 sbuf[pos++] = comm->load[d+1].sum_m;
2163 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2164 sbuf[pos++] = comm->load[d+1].flags;
2167 sbuf[pos++] = cellsizes.fracLowerMax;
2168 sbuf[pos++] = cellsizes.fracUpperMin;
2173 sbuf[pos++] = comm->load[d+1].mdf;
2174 sbuf[pos++] = comm->load[d+1].pme;
2178 /* Communicate a row in DD direction d.
2179 * The communicators are setup such that the root always has rank 0.
2182 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2183 load->load, load->nload*sizeof(float), MPI_BYTE,
2184 0, comm->mpi_comm_load[d]);
2186 if (dd->ci[dim] == dd->master_ci[dim])
2188 /* We are the master along this row, process this row */
2189 RowMaster *rowMaster = nullptr;
2193 rowMaster = cellsizes.rowMaster.get();
2203 for (int i = 0; i < dd->nc[dim]; i++)
2205 load->sum += load->load[pos++];
2206 load->max = std::max(load->max, load->load[pos]);
2208 if (isDlbOn(dd->comm))
2210 if (rowMaster->dlbIsLimited)
2212 /* This direction could not be load balanced properly,
2213 * therefore we need to use the maximum iso the average load.
2215 load->sum_m = std::max(load->sum_m, load->load[pos]);
2219 load->sum_m += load->load[pos];
2222 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2226 load->flags = gmx::roundToInt(load->load[pos++]);
2230 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2231 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2236 load->mdf = std::max(load->mdf, load->load[pos]);
2238 load->pme = std::max(load->pme, load->load[pos]);
2242 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2244 load->sum_m *= dd->nc[dim];
2245 load->flags |= (1<<d);
2253 comm->nload += dd_load_count(comm);
2254 comm->load_step += comm->cycl[ddCyclStep];
2255 comm->load_sum += comm->load[0].sum;
2256 comm->load_max += comm->load[0].max;
2259 for (int d = 0; d < dd->ndim; d++)
2261 if (comm->load[0].flags & (1<<d))
2263 comm->load_lim[d]++;
2269 comm->load_mdf += comm->load[0].mdf;
2270 comm->load_pme += comm->load[0].pme;
2274 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2278 fprintf(debug, "get_load_distribution finished\n");
2282 static float dd_force_load_fraction(gmx_domdec_t *dd)
2284 /* Return the relative performance loss on the total run time
2285 * due to the force calculation load imbalance.
2287 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2289 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2297 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2299 /* Return the relative performance loss on the total run time
2300 * due to the force calculation load imbalance.
2302 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2305 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2306 (dd->comm->load_step*dd->nnodes);
2314 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2316 gmx_domdec_comm_t *comm = dd->comm;
2318 /* Only the master rank prints loads and only if we measured loads */
2319 if (!DDMASTER(dd) || comm->nload == 0)
2325 int numPpRanks = dd->nnodes;
2326 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2327 int numRanks = numPpRanks + numPmeRanks;
2328 float lossFraction = 0;
2330 /* Print the average load imbalance and performance loss */
2331 if (dd->nnodes > 1 && comm->load_sum > 0)
2333 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2334 lossFraction = dd_force_imb_perf_loss(dd);
2336 std::string msg = "\n Dynamic load balancing report:\n";
2337 std::string dlbStateStr;
2339 switch (dd->comm->dlbState)
2341 case DlbState::offUser:
2342 dlbStateStr = "DLB was off during the run per user request.";
2344 case DlbState::offForever:
2345 /* Currectly this can happen due to performance loss observed, cell size
2346 * limitations or incompatibility with other settings observed during
2347 * determineInitialDlbState(). */
2348 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2350 case DlbState::offCanTurnOn:
2351 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2353 case DlbState::offTemporarilyLocked:
2354 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2356 case DlbState::onCanTurnOff:
2357 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2359 case DlbState::onUser:
2360 dlbStateStr = "DLB was permanently on during the run per user request.";
2363 GMX_ASSERT(false, "Undocumented DLB state");
2366 msg += " " + dlbStateStr + "\n";
2367 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2368 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2369 gmx::roundToInt(dd_force_load_fraction(dd)*100));
2370 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2372 fprintf(fplog, "%s", msg.c_str());
2373 fprintf(stderr, "%s", msg.c_str());
2376 /* Print during what percentage of steps the load balancing was limited */
2377 bool dlbWasLimited = false;
2380 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2381 for (int d = 0; d < dd->ndim; d++)
2383 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2384 sprintf(buf+strlen(buf), " %c %d %%",
2385 dim2char(dd->dim[d]), limitPercentage);
2386 if (limitPercentage >= 50)
2388 dlbWasLimited = true;
2391 sprintf(buf + strlen(buf), "\n");
2392 fprintf(fplog, "%s", buf);
2393 fprintf(stderr, "%s", buf);
2396 /* Print the performance loss due to separate PME - PP rank imbalance */
2397 float lossFractionPme = 0;
2398 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2400 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2401 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2402 if (lossFractionPme <= 0)
2404 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2408 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2410 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2411 fprintf(fplog, "%s", buf);
2412 fprintf(stderr, "%s", buf);
2413 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2414 fprintf(fplog, "%s", buf);
2415 fprintf(stderr, "%s", buf);
2417 fprintf(fplog, "\n");
2418 fprintf(stderr, "\n");
2420 if (lossFraction >= DD_PERF_LOSS_WARN)
2423 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2424 " in the domain decomposition.\n", lossFraction*100);
2427 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2429 else if (dlbWasLimited)
2431 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2433 fprintf(fplog, "%s\n", buf);
2434 fprintf(stderr, "%s\n", buf);
2436 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2439 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2440 " had %s work to do than the PP ranks.\n"
2441 " You might want to %s the number of PME ranks\n"
2442 " or %s the cut-off and the grid spacing.\n",
2443 std::fabs(lossFractionPme*100),
2444 (lossFractionPme < 0) ? "less" : "more",
2445 (lossFractionPme < 0) ? "decrease" : "increase",
2446 (lossFractionPme < 0) ? "decrease" : "increase");
2447 fprintf(fplog, "%s\n", buf);
2448 fprintf(stderr, "%s\n", buf);
2452 static float dd_vol_min(gmx_domdec_t *dd)
2454 return dd->comm->load[0].cvol_min*dd->nnodes;
2457 static int dd_load_flags(gmx_domdec_t *dd)
2459 return dd->comm->load[0].flags;
2462 static float dd_f_imbal(gmx_domdec_t *dd)
2464 if (dd->comm->load[0].sum > 0)
2466 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2470 /* Something is wrong in the cycle counting, report no load imbalance */
2475 float dd_pme_f_ratio(gmx_domdec_t *dd)
2477 /* Should only be called on the DD master rank */
2478 assert(DDMASTER(dd));
2480 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2482 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2490 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, int64_t step)
2495 flags = dd_load_flags(dd);
2499 "DD load balancing is limited by minimum cell size in dimension");
2500 for (d = 0; d < dd->ndim; d++)
2504 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2507 fprintf(fplog, "\n");
2509 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2510 if (isDlbOn(dd->comm))
2512 fprintf(fplog, " vol min/aver %5.3f%c",
2513 dd_vol_min(dd), flags ? '!' : ' ');
2517 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2519 if (dd->comm->cycl_n[ddCyclPME])
2521 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2523 fprintf(fplog, "\n\n");
2526 static void dd_print_load_verbose(gmx_domdec_t *dd)
2528 if (isDlbOn(dd->comm))
2530 fprintf(stderr, "vol %4.2f%c ",
2531 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2535 fprintf(stderr, "imb F %2d%% ", gmx::roundToInt(dd_f_imbal(dd)*100));
2537 if (dd->comm->cycl_n[ddCyclPME])
2539 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2544 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2549 gmx_bool bPartOfGroup = FALSE;
2551 dim = dd->dim[dim_ind];
2552 copy_ivec(loc, loc_c);
2553 for (i = 0; i < dd->nc[dim]; i++)
2556 rank = dd_index(dd->nc, loc_c);
2557 if (rank == dd->rank)
2559 /* This process is part of the group */
2560 bPartOfGroup = TRUE;
2563 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2567 dd->comm->mpi_comm_load[dim_ind] = c_row;
2568 if (!isDlbDisabled(dd->comm))
2570 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2572 if (dd->ci[dim] == dd->master_ci[dim])
2574 /* This is the root process of this row */
2575 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2577 RowMaster &rowMaster = *cellsizes.rowMaster;
2578 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2579 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2580 rowMaster.isCellMin.resize(dd->nc[dim]);
2583 rowMaster.bounds.resize(dd->nc[dim]);
2585 rowMaster.buf_ncd.resize(dd->nc[dim]);
2589 /* This is not a root process, we only need to receive cell_f */
2590 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2593 if (dd->ci[dim] == dd->master_ci[dim])
2595 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2601 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2605 int physicalnode_id_hash;
2607 MPI_Comm mpi_comm_pp_physicalnode;
2609 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2611 /* Only ranks with short-ranged tasks (currently) use GPUs.
2612 * If we don't have GPUs assigned, there are no resources to share.
2617 physicalnode_id_hash = gmx_physicalnode_id_hash();
2623 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2624 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2625 dd->rank, physicalnode_id_hash, gpu_id);
2627 /* Split the PP communicator over the physical nodes */
2628 /* TODO: See if we should store this (before), as it's also used for
2629 * for the nodecomm summation.
2631 // TODO PhysicalNodeCommunicator could be extended/used to handle
2632 // the need for per-node per-group communicators.
2633 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2634 &mpi_comm_pp_physicalnode);
2635 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2636 &dd->comm->mpi_comm_gpu_shared);
2637 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2638 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2642 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2645 /* Note that some ranks could share a GPU, while others don't */
2647 if (dd->comm->nrank_gpu_shared == 1)
2649 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2652 GMX_UNUSED_VALUE(cr);
2653 GMX_UNUSED_VALUE(gpu_id);
2657 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2660 int dim0, dim1, i, j;
2665 fprintf(debug, "Making load communicators\n");
2668 snew(dd->comm->load, std::max(dd->ndim, 1));
2669 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2677 make_load_communicator(dd, 0, loc);
2681 for (i = 0; i < dd->nc[dim0]; i++)
2684 make_load_communicator(dd, 1, loc);
2690 for (i = 0; i < dd->nc[dim0]; i++)
2694 for (j = 0; j < dd->nc[dim1]; j++)
2697 make_load_communicator(dd, 2, loc);
2704 fprintf(debug, "Finished making load communicators\n");
2709 /*! \brief Sets up the relation between neighboring domains and zones */
2710 static void setup_neighbor_relations(gmx_domdec_t *dd)
2712 int d, dim, i, j, m;
2714 gmx_domdec_zones_t *zones;
2715 gmx_domdec_ns_ranges_t *izone;
2717 for (d = 0; d < dd->ndim; d++)
2720 copy_ivec(dd->ci, tmp);
2721 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2722 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2723 copy_ivec(dd->ci, tmp);
2724 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2725 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2728 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2731 dd->neighbor[d][1]);
2735 int nzone = (1 << dd->ndim);
2736 int nizone = (1 << std::max(dd->ndim - 1, 0));
2737 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2739 zones = &dd->comm->zones;
2741 for (i = 0; i < nzone; i++)
2744 clear_ivec(zones->shift[i]);
2745 for (d = 0; d < dd->ndim; d++)
2747 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2752 for (i = 0; i < nzone; i++)
2754 for (d = 0; d < DIM; d++)
2756 s[d] = dd->ci[d] - zones->shift[i][d];
2761 else if (s[d] >= dd->nc[d])
2767 zones->nizone = nizone;
2768 for (i = 0; i < zones->nizone; i++)
2770 assert(ddNonbondedZonePairRanges[i][0] == i);
2772 izone = &zones->izone[i];
2773 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2774 * j-zones up to nzone.
2776 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2777 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2778 for (dim = 0; dim < DIM; dim++)
2780 if (dd->nc[dim] == 1)
2782 /* All shifts should be allowed */
2783 izone->shift0[dim] = -1;
2784 izone->shift1[dim] = 1;
2788 /* Determine the min/max j-zone shift wrt the i-zone */
2789 izone->shift0[dim] = 1;
2790 izone->shift1[dim] = -1;
2791 for (j = izone->j0; j < izone->j1; j++)
2793 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2794 if (shift_diff < izone->shift0[dim])
2796 izone->shift0[dim] = shift_diff;
2798 if (shift_diff > izone->shift1[dim])
2800 izone->shift1[dim] = shift_diff;
2807 if (!isDlbDisabled(dd->comm))
2809 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2812 if (dd->comm->bRecordLoad)
2814 make_load_communicators(dd);
2818 static void make_pp_communicator(FILE *fplog,
2820 t_commrec gmx_unused *cr,
2821 bool gmx_unused reorder)
2824 gmx_domdec_comm_t *comm;
2831 if (comm->bCartesianPP)
2833 /* Set up cartesian communication for the particle-particle part */
2836 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2837 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2840 for (int i = 0; i < DIM; i++)
2844 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
2846 /* We overwrite the old communicator with the new cartesian one */
2847 cr->mpi_comm_mygroup = comm_cart;
2850 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2851 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2853 if (comm->bCartesianPP_PME)
2855 /* Since we want to use the original cartesian setup for sim,
2856 * and not the one after split, we need to make an index.
2858 snew(comm->ddindex2ddnodeid, dd->nnodes);
2859 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2860 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2861 /* Get the rank of the DD master,
2862 * above we made sure that the master node is a PP node.
2872 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2874 else if (comm->bCartesianPP)
2876 if (cr->npmenodes == 0)
2878 /* The PP communicator is also
2879 * the communicator for this simulation
2881 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2883 cr->nodeid = dd->rank;
2885 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2887 /* We need to make an index to go from the coordinates
2888 * to the nodeid of this simulation.
2890 snew(comm->ddindex2simnodeid, dd->nnodes);
2891 snew(buf, dd->nnodes);
2892 if (thisRankHasDuty(cr, DUTY_PP))
2894 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2896 /* Communicate the ddindex to simulation nodeid index */
2897 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2898 cr->mpi_comm_mysim);
2901 /* Determine the master coordinates and rank.
2902 * The DD master should be the same node as the master of this sim.
2904 for (int i = 0; i < dd->nnodes; i++)
2906 if (comm->ddindex2simnodeid[i] == 0)
2908 ddindex2xyz(dd->nc, i, dd->master_ci);
2909 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2914 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2919 /* No Cartesian communicators */
2920 /* We use the rank in dd->comm->all as DD index */
2921 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2922 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2924 clear_ivec(dd->master_ci);
2931 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2932 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2937 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2938 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2942 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2946 gmx_domdec_comm_t *comm = dd->comm;
2948 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2951 snew(comm->ddindex2simnodeid, dd->nnodes);
2952 snew(buf, dd->nnodes);
2953 if (thisRankHasDuty(cr, DUTY_PP))
2955 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2957 /* Communicate the ddindex to simulation nodeid index */
2958 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2959 cr->mpi_comm_mysim);
2963 GMX_UNUSED_VALUE(dd);
2964 GMX_UNUSED_VALUE(cr);
2968 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
2969 DdRankOrder gmx_unused rankOrder,
2970 bool gmx_unused reorder)
2972 gmx_domdec_comm_t *comm;
2981 if (comm->bCartesianPP)
2983 for (i = 1; i < DIM; i++)
2985 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2987 if (bDiv[YY] || bDiv[ZZ])
2989 comm->bCartesianPP_PME = TRUE;
2990 /* If we have 2D PME decomposition, which is always in x+y,
2991 * we stack the PME only nodes in z.
2992 * Otherwise we choose the direction that provides the thinnest slab
2993 * of PME only nodes as this will have the least effect
2994 * on the PP communication.
2995 * But for the PME communication the opposite might be better.
2997 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
2999 dd->nc[YY] > dd->nc[ZZ]))
3001 comm->cartpmedim = ZZ;
3005 comm->cartpmedim = YY;
3007 comm->ntot[comm->cartpmedim]
3008 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3012 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3014 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3018 if (comm->bCartesianPP_PME)
3026 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3029 for (i = 0; i < DIM; i++)
3033 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
3035 MPI_Comm_rank(comm_cart, &rank);
3036 if (MASTER(cr) && rank != 0)
3038 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3041 /* With this assigment we loose the link to the original communicator
3042 * which will usually be MPI_COMM_WORLD, unless have multisim.
3044 cr->mpi_comm_mysim = comm_cart;
3045 cr->sim_nodeid = rank;
3047 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3051 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3052 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3055 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3059 if (cr->npmenodes == 0 ||
3060 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3062 cr->duty = DUTY_PME;
3065 /* Split the sim communicator into PP and PME only nodes */
3066 MPI_Comm_split(cr->mpi_comm_mysim,
3067 getThisRankDuties(cr),
3068 dd_index(comm->ntot, dd->ci),
3069 &cr->mpi_comm_mygroup);
3076 case DdRankOrder::pp_pme:
3079 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3082 case DdRankOrder::interleave:
3083 /* Interleave the PP-only and PME-only ranks */
3086 fprintf(fplog, "Interleaving PP and PME ranks\n");
3088 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3090 case DdRankOrder::cartesian:
3093 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3096 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3098 cr->duty = DUTY_PME;
3105 /* Split the sim communicator into PP and PME only nodes */
3106 MPI_Comm_split(cr->mpi_comm_mysim,
3107 getThisRankDuties(cr),
3109 &cr->mpi_comm_mygroup);
3110 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3116 fprintf(fplog, "This rank does only %s work.\n\n",
3117 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3121 /*! \brief Generates the MPI communicators for domain decomposition */
3122 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3123 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3125 gmx_domdec_comm_t *comm;
3130 copy_ivec(dd->nc, comm->ntot);
3132 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3133 comm->bCartesianPP_PME = FALSE;
3135 /* Reorder the nodes by default. This might change the MPI ranks.
3136 * Real reordering is only supported on very few architectures,
3137 * Blue Gene is one of them.
3139 CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
3141 if (cr->npmenodes > 0)
3143 /* Split the communicator into a PP and PME part */
3144 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3145 if (comm->bCartesianPP_PME)
3147 /* We (possibly) reordered the nodes in split_communicator,
3148 * so it is no longer required in make_pp_communicator.
3150 CartReorder = FALSE;
3155 /* All nodes do PP and PME */
3157 /* We do not require separate communicators */
3158 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3162 if (thisRankHasDuty(cr, DUTY_PP))
3164 /* Copy or make a new PP communicator */
3165 make_pp_communicator(fplog, dd, cr, CartReorder);
3169 receive_ddindex2simnodeid(dd, cr);
3172 if (!thisRankHasDuty(cr, DUTY_PME))
3174 /* Set up the commnuication to our PME node */
3175 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3176 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3179 fprintf(debug, "My pme_nodeid %d receive ener %s\n",
3180 dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
3185 dd->pme_nodeid = -1;
3190 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3192 comm->cgs_gl.index[comm->cgs_gl.nr]);
3196 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3198 real *slb_frac, tot;
3203 if (nc > 1 && size_string != nullptr)
3207 fprintf(fplog, "Using static load balancing for the %s direction\n",
3212 for (i = 0; i < nc; i++)
3215 sscanf(size_string, "%20lf%n", &dbl, &n);
3218 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3227 fprintf(fplog, "Relative cell sizes:");
3229 for (i = 0; i < nc; i++)
3234 fprintf(fplog, " %5.3f", slb_frac[i]);
3239 fprintf(fplog, "\n");
3246 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3249 gmx_mtop_ilistloop_t iloop;
3253 iloop = gmx_mtop_ilistloop_init(mtop);
3254 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3256 for (ftype = 0; ftype < F_NRE; ftype++)
3258 if ((interaction_function[ftype].flags & IF_BOND) &&
3261 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3269 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3275 val = getenv(env_var);
3278 if (sscanf(val, "%20d", &nst) <= 0)
3284 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3292 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3296 fprintf(stderr, "\n%s\n", warn_string);
3300 fprintf(fplog, "\n%s\n", warn_string);
3304 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3305 const t_inputrec *ir, FILE *fplog)
3307 if (ir->ePBC == epbcSCREW &&
3308 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3310 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3313 if (ir->ns_type == ensSIMPLE)
3315 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3318 if (ir->nstlist == 0)
3320 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3323 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3325 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3329 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3334 r = ddbox->box_size[XX];
3335 for (di = 0; di < dd->ndim; di++)
3338 /* Check using the initial average cell size */
3339 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3345 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3347 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
3348 const std::string &reasonStr,
3352 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3353 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3355 if (cmdlineDlbState == DlbState::onUser)
3357 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
3359 else if (cmdlineDlbState == DlbState::offCanTurnOn)
3361 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3363 return DlbState::offForever;
3366 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3368 * This function parses the parameters of "-dlb" command line option setting
3369 * corresponding state values. Then it checks the consistency of the determined
3370 * state with other run parameters and settings. As a result, the initial state
3371 * may be altered or an error may be thrown if incompatibility of options is detected.
3373 * \param [in] fplog Pointer to mdrun log file.
3374 * \param [in] cr Pointer to MPI communication object.
3375 * \param [in] dlbOption Enum value for the DLB option.
3376 * \param [in] bRecordLoad True if the load balancer is recording load information.
3377 * \param [in] mdrunOptions Options for mdrun.
3378 * \param [in] ir Pointer mdrun to input parameters.
3379 * \returns DLB initial/startup state.
3381 static DlbState determineInitialDlbState(FILE *fplog, t_commrec *cr,
3382 DlbOption dlbOption, gmx_bool bRecordLoad,
3383 const MdrunOptions &mdrunOptions,
3384 const t_inputrec *ir)
3386 DlbState dlbState = DlbState::offCanTurnOn;
3390 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
3391 case DlbOption::no: dlbState = DlbState::offUser; break;
3392 case DlbOption::yes: dlbState = DlbState::onUser; break;
3393 default: gmx_incons("Invalid dlbOption enum value");
3396 /* Reruns don't support DLB: bail or override auto mode */
3397 if (mdrunOptions.rerun)
3399 std::string reasonStr = "it is not supported in reruns.";
3400 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3403 /* Unsupported integrators */
3404 if (!EI_DYNAMICS(ir->eI))
3406 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3407 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3410 /* Without cycle counters we can't time work to balance on */
3413 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3414 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3417 if (mdrunOptions.reproducible)
3419 std::string reasonStr = "you started a reproducible run.";
3422 case DlbState::offUser:
3424 case DlbState::offForever:
3425 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
3427 case DlbState::offCanTurnOn:
3428 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3429 case DlbState::onCanTurnOff:
3430 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
3432 case DlbState::onUser:
3433 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3435 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3442 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3445 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3447 /* Decomposition order z,y,x */
3450 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3452 for (int dim = DIM-1; dim >= 0; dim--)
3454 if (dd->nc[dim] > 1)
3456 dd->dim[dd->ndim++] = dim;
3462 /* Decomposition order x,y,z */
3463 for (int dim = 0; dim < DIM; dim++)
3465 if (dd->nc[dim] > 1)
3467 dd->dim[dd->ndim++] = dim;
3474 /* Set dim[0] to avoid extra checks on ndim in several places */
3479 static gmx_domdec_comm_t *init_dd_comm()
3481 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3483 comm->n_load_have = 0;
3484 comm->n_load_collect = 0;
3486 comm->haveTurnedOffDlb = false;
3488 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3490 comm->sum_nat[i] = 0;
3494 comm->load_step = 0;
3497 clear_ivec(comm->load_lim);
3501 /* This should be replaced by a unique pointer */
3502 comm->balanceRegion = ddBalanceRegionAllocate();
3507 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3508 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3509 const DomdecOptions &options,
3510 const MdrunOptions &mdrunOptions,
3511 const gmx_mtop_t *mtop,
3512 const t_inputrec *ir,
3514 gmx::ArrayRef<const gmx::RVec> xGlobal,
3518 real r_bonded_limit = -1;
3519 const real tenPercentMargin = 1.1;
3520 gmx_domdec_comm_t *comm = dd->comm;
3522 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3523 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3525 dd->pme_recv_f_alloc = 0;
3526 dd->pme_recv_f_buf = nullptr;
3528 /* Initialize to GPU share count to 0, might change later */
3529 comm->nrank_gpu_shared = 0;
3531 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3532 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3533 /* To consider turning DLB on after 2*nstlist steps we need to check
3534 * at partitioning count 3. Thus we need to increase the first count by 2.
3536 comm->ddPartioningCountFirstDlbOff += 2;
3540 fprintf(fplog, "Dynamic load balancing: %s\n",
3541 edlbs_names[int(comm->dlbState)]);
3543 comm->bPMELoadBalDLBLimits = FALSE;
3545 /* Allocate the charge group/atom sorting struct */
3546 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3548 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3550 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3551 mtop->bIntermolecularInteractions);
3552 if (comm->bInterCGBondeds)
3554 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3558 comm->bInterCGMultiBody = FALSE;
3561 dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
3562 dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
3566 /* Set the cut-off to some very large value,
3567 * so we don't need if statements everywhere in the code.
3568 * We use sqrt, since the cut-off is squared in some places.
3570 comm->cutoff = GMX_CUTOFF_INF;
3574 comm->cutoff = ir->rlist;
3576 comm->cutoff_mbody = 0;
3578 /* Determine the minimum cell size limit, affected by many factors */
3579 comm->cellsize_limit = 0;
3580 comm->bBondComm = FALSE;
3582 /* We do not allow home atoms to move beyond the neighboring domain
3583 * between domain decomposition steps, which limits the cell size.
3584 * Get an estimate of cell size limit due to atom displacement.
3585 * In most cases this is a large overestimate, because it assumes
3586 * non-interaction atoms.
3587 * We set the chance to 1 in a trillion steps.
3589 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
3590 const real limitForAtomDisplacement =
3591 minCellSizeForAtomDisplacement(*mtop, *ir, c_chanceThatAtomMovesBeyondDomain);
3595 "Minimum cell size due to atom displacement: %.3f nm\n",
3596 limitForAtomDisplacement);
3598 comm->cellsize_limit = std::max(comm->cellsize_limit,
3599 limitForAtomDisplacement);
3601 /* TODO: PME decomposition currently requires atoms not to be more than
3602 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
3603 * In nearly all cases, limitForAtomDisplacement will be smaller
3604 * than 2/3*rlist, so the PME requirement is satisfied.
3605 * But it would be better for both correctness and performance
3606 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
3607 * Note that we would need to improve the pairlist buffer case.
3610 if (comm->bInterCGBondeds)
3612 if (options.minimumCommunicationRange > 0)
3614 comm->cutoff_mbody = options.minimumCommunicationRange;
3615 if (options.useBondedCommunication)
3617 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3621 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3623 r_bonded_limit = comm->cutoff_mbody;
3625 else if (ir->bPeriodicMols)
3627 /* Can not easily determine the required cut-off */
3628 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3629 comm->cutoff_mbody = comm->cutoff/2;
3630 r_bonded_limit = comm->cutoff_mbody;
3638 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3639 options.checkBondedInteractions,
3642 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3643 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3645 /* We use an initial margin of 10% for the minimum cell size,
3646 * except when we are just below the non-bonded cut-off.
3648 if (options.useBondedCommunication)
3650 if (std::max(r_2b, r_mb) > comm->cutoff)
3652 r_bonded = std::max(r_2b, r_mb);
3653 r_bonded_limit = tenPercentMargin*r_bonded;
3654 comm->bBondComm = TRUE;
3659 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3661 /* We determine cutoff_mbody later */
3665 /* No special bonded communication,
3666 * simply increase the DD cut-off.
3668 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3669 comm->cutoff_mbody = r_bonded_limit;
3670 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3676 "Minimum cell size due to bonded interactions: %.3f nm\n",
3679 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3683 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3685 /* There is a cell size limit due to the constraints (P-LINCS) */
3686 rconstr = gmx::constr_r_max(fplog, mtop, ir);
3690 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3692 if (rconstr > comm->cellsize_limit)
3694 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3698 else if (options.constraintCommunicationRange > 0 && fplog)
3700 /* Here we do not check for dd->bInterCGcons,
3701 * because one can also set a cell size limit for virtual sites only
3702 * and at this point we don't know yet if there are intercg v-sites.
3705 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3706 options.constraintCommunicationRange);
3707 rconstr = options.constraintCommunicationRange;
3709 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3711 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3713 if (options.numCells[XX] > 0)
3715 copy_ivec(options.numCells, dd->nc);
3716 set_dd_dim(fplog, dd);
3717 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3719 if (options.numPmeRanks >= 0)
3721 cr->npmenodes = options.numPmeRanks;
3725 /* When the DD grid is set explicitly and -npme is set to auto,
3726 * don't use PME ranks. We check later if the DD grid is
3727 * compatible with the total number of ranks.
3732 real acs = average_cellsize_min(dd, ddbox);
3733 if (acs < comm->cellsize_limit)
3737 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3739 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3740 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3741 acs, comm->cellsize_limit);
3746 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3748 /* We need to choose the optimal DD grid and possibly PME nodes */
3750 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3751 options.numPmeRanks,
3752 !isDlbDisabled(comm),
3754 comm->cellsize_limit, comm->cutoff,
3755 comm->bInterCGBondeds);
3757 if (dd->nc[XX] == 0)
3760 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3761 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3762 !bC ? "-rdd" : "-rcon",
3763 comm->dlbState != DlbState::offUser ? " or -dds" : "",
3764 bC ? " or your LINCS settings" : "");
3766 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3767 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3769 "Look in the log file for details on the domain decomposition",
3770 cr->nnodes-cr->npmenodes, limit, buf);
3772 set_dd_dim(fplog, dd);
3778 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3779 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3782 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3783 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3785 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3786 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3787 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3789 if (cr->npmenodes > dd->nnodes)
3791 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3792 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3794 if (cr->npmenodes > 0)
3796 comm->npmenodes = cr->npmenodes;
3800 comm->npmenodes = dd->nnodes;
3803 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3805 /* The following choices should match those
3806 * in comm_cost_est in domdec_setup.c.
3807 * Note that here the checks have to take into account
3808 * that the decomposition might occur in a different order than xyz
3809 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3810 * in which case they will not match those in comm_cost_est,
3811 * but since that is mainly for testing purposes that's fine.
3813 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3814 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3815 getenv("GMX_PMEONEDD") == nullptr)
3817 comm->npmedecompdim = 2;
3818 comm->npmenodes_x = dd->nc[XX];
3819 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3823 /* In case nc is 1 in both x and y we could still choose to
3824 * decompose pme in y instead of x, but we use x for simplicity.
3826 comm->npmedecompdim = 1;
3827 if (dd->dim[0] == YY)
3829 comm->npmenodes_x = 1;
3830 comm->npmenodes_y = comm->npmenodes;
3834 comm->npmenodes_x = comm->npmenodes;
3835 comm->npmenodes_y = 1;
3840 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3841 comm->npmenodes_x, comm->npmenodes_y, 1);
3846 comm->npmedecompdim = 0;
3847 comm->npmenodes_x = 0;
3848 comm->npmenodes_y = 0;
3851 snew(comm->slb_frac, DIM);
3852 if (isDlbDisabled(comm))
3854 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3855 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3856 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3859 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3861 if (comm->bBondComm || !isDlbDisabled(comm))
3863 /* Set the bonded communication distance to halfway
3864 * the minimum and the maximum,
3865 * since the extra communication cost is nearly zero.
3867 real acs = average_cellsize_min(dd, ddbox);
3868 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3869 if (!isDlbDisabled(comm))
3871 /* Check if this does not limit the scaling */
3872 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3873 options.dlbScaling*acs);
3875 if (!comm->bBondComm)
3877 /* Without bBondComm do not go beyond the n.b. cut-off */
3878 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3879 if (comm->cellsize_limit >= comm->cutoff)
3881 /* We don't loose a lot of efficieny
3882 * when increasing it to the n.b. cut-off.
3883 * It can even be slightly faster, because we need
3884 * less checks for the communication setup.
3886 comm->cutoff_mbody = comm->cutoff;
3889 /* Check if we did not end up below our original limit */
3890 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3892 if (comm->cutoff_mbody > comm->cellsize_limit)
3894 comm->cellsize_limit = comm->cutoff_mbody;
3897 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3902 fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
3903 "cellsize limit %f\n",
3904 gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
3909 check_dd_restrictions(cr, dd, ir, fplog);
3913 static void set_dlb_limits(gmx_domdec_t *dd)
3916 for (int d = 0; d < dd->ndim; d++)
3918 /* Set the number of pulses to the value for DLB */
3919 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3921 dd->comm->cellsize_min[dd->dim[d]] =
3922 dd->comm->cellsize_min_dlb[dd->dim[d]];
3927 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3929 gmx_domdec_t *dd = cr->dd;
3930 gmx_domdec_comm_t *comm = dd->comm;
3932 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3933 for (int d = 1; d < dd->ndim; d++)
3935 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3938 /* Turn off DLB if we're too close to the cell size limit. */
3939 if (cellsize_min < comm->cellsize_limit*1.05)
3941 auto str = gmx::formatString("step %" PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3942 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3943 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3944 dd_warning(cr, fplog, str.c_str());
3946 comm->dlbState = DlbState::offForever;
3951 sprintf(buf, "step %" PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3952 dd_warning(cr, fplog, buf);
3953 comm->dlbState = DlbState::onCanTurnOff;
3955 /* Store the non-DLB performance, so we can check if DLB actually
3956 * improves performance.
3958 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3959 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3963 /* We can set the required cell size info here,
3964 * so we do not need to communicate this.
3965 * The grid is completely uniform.
3967 for (int d = 0; d < dd->ndim; d++)
3969 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3973 comm->load[d].sum_m = comm->load[d].sum;
3975 int nc = dd->nc[dd->dim[d]];
3976 for (int i = 0; i < nc; i++)
3978 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3981 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3982 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3985 rowMaster->cellFrac[nc] = 1.0;
3990 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
3992 gmx_domdec_t *dd = cr->dd;
3995 sprintf(buf, "step %" PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
3996 dd_warning(cr, fplog, buf);
3997 dd->comm->dlbState = DlbState::offCanTurnOn;
3998 dd->comm->haveTurnedOffDlb = true;
3999 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4002 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, int64_t step)
4004 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == DlbState::offCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
4006 sprintf(buf, "step %" PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
4007 dd_warning(cr, fplog, buf);
4008 cr->dd->comm->dlbState = DlbState::offForever;
4011 static char *init_bLocalCG(const gmx_mtop_t *mtop)
4016 ncg = ncg_mtop(mtop);
4017 snew(bLocalCG, ncg);
4018 for (cg = 0; cg < ncg; cg++)
4020 bLocalCG[cg] = FALSE;
4026 void dd_init_bondeds(FILE *fplog,
4028 const gmx_mtop_t *mtop,
4029 const gmx_vsite_t *vsite,
4030 const t_inputrec *ir,
4031 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4033 gmx_domdec_comm_t *comm;
4035 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4039 if (comm->bBondComm)
4041 /* Communicate atoms beyond the cut-off for bonded interactions */
4044 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4046 comm->bLocalCG = init_bLocalCG(mtop);
4050 /* Only communicate atoms based on cut-off */
4051 comm->cglink = nullptr;
4052 comm->bLocalCG = nullptr;
4056 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4057 const gmx_mtop_t *mtop, const t_inputrec *ir,
4058 gmx_bool bDynLoadBal, real dlb_scale,
4059 const gmx_ddbox_t *ddbox)
4061 gmx_domdec_comm_t *comm;
4067 if (fplog == nullptr)
4076 fprintf(fplog, "The maximum number of communication pulses is:");
4077 for (d = 0; d < dd->ndim; d++)
4079 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4081 fprintf(fplog, "\n");
4082 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4083 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4084 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4085 for (d = 0; d < DIM; d++)
4089 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4096 comm->cellsize_min_dlb[d]/
4097 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4099 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4102 fprintf(fplog, "\n");
4106 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4107 fprintf(fplog, "The initial number of communication pulses is:");
4108 for (d = 0; d < dd->ndim; d++)
4110 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4112 fprintf(fplog, "\n");
4113 fprintf(fplog, "The initial domain decomposition cell size is:");
4114 for (d = 0; d < DIM; d++)
4118 fprintf(fplog, " %c %.2f nm",
4119 dim2char(d), dd->comm->cellsize_min[d]);
4122 fprintf(fplog, "\n\n");
4125 gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
4127 if (comm->bInterCGBondeds ||
4129 dd->bInterCGcons || dd->bInterCGsettles)
4131 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4132 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4133 "non-bonded interactions", "", comm->cutoff);
4137 limit = dd->comm->cellsize_limit;
4141 if (dynamic_dd_box(ddbox, ir))
4143 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4145 limit = dd->comm->cellsize_min[XX];
4146 for (d = 1; d < DIM; d++)
4148 limit = std::min(limit, dd->comm->cellsize_min[d]);
4152 if (comm->bInterCGBondeds)
4154 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4155 "two-body bonded interactions", "(-rdd)",
4156 std::max(comm->cutoff, comm->cutoff_mbody));
4157 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4158 "multi-body bonded interactions", "(-rdd)",
4159 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4163 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4164 "virtual site constructions", "(-rcon)", limit);
4166 if (dd->bInterCGcons || dd->bInterCGsettles)
4168 sprintf(buf, "atoms separated by up to %d constraints",
4170 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4171 buf, "(-rcon)", limit);
4173 fprintf(fplog, "\n");
4179 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4181 const t_inputrec *ir,
4182 const gmx_ddbox_t *ddbox)
4184 gmx_domdec_comm_t *comm;
4185 int d, dim, npulse, npulse_d_max, npulse_d;
4190 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4192 /* Determine the maximum number of comm. pulses in one dimension */
4194 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4196 /* Determine the maximum required number of grid pulses */
4197 if (comm->cellsize_limit >= comm->cutoff)
4199 /* Only a single pulse is required */
4202 else if (!bNoCutOff && comm->cellsize_limit > 0)
4204 /* We round down slightly here to avoid overhead due to the latency
4205 * of extra communication calls when the cut-off
4206 * would be only slightly longer than the cell size.
4207 * Later cellsize_limit is redetermined,
4208 * so we can not miss interactions due to this rounding.
4210 npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
4214 /* There is no cell size limit */
4215 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4218 if (!bNoCutOff && npulse > 1)
4220 /* See if we can do with less pulses, based on dlb_scale */
4222 for (d = 0; d < dd->ndim; d++)
4225 npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
4226 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4227 npulse_d_max = std::max(npulse_d_max, npulse_d);
4229 npulse = std::min(npulse, npulse_d_max);
4232 /* This env var can override npulse */
4233 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4240 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4241 for (d = 0; d < dd->ndim; d++)
4243 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4244 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4245 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4247 comm->bVacDLBNoLimit = FALSE;
4251 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4252 if (!comm->bVacDLBNoLimit)
4254 comm->cellsize_limit = std::max(comm->cellsize_limit,
4255 comm->cutoff/comm->maxpulse);
4257 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4258 /* Set the minimum cell size for each DD dimension */
4259 for (d = 0; d < dd->ndim; d++)
4261 if (comm->bVacDLBNoLimit ||
4262 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4264 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4268 comm->cellsize_min_dlb[dd->dim[d]] =
4269 comm->cutoff/comm->cd[d].np_dlb;
4272 if (comm->cutoff_mbody <= 0)
4274 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4282 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4284 /* If each molecule is a single charge group
4285 * or we use domain decomposition for each periodic dimension,
4286 * we do not need to take pbc into account for the bonded interactions.
4288 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4291 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4294 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4295 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4296 const gmx_mtop_t *mtop, const t_inputrec *ir,
4297 const gmx_ddbox_t *ddbox)
4299 gmx_domdec_comm_t *comm;
4305 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4307 init_ddpme(dd, &comm->ddpme[0], 0);
4308 if (comm->npmedecompdim >= 2)
4310 init_ddpme(dd, &comm->ddpme[1], 1);
4315 comm->npmenodes = 0;
4316 if (dd->pme_nodeid >= 0)
4318 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4319 "Can not have separate PME ranks without PME electrostatics");
4325 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4327 if (!isDlbDisabled(comm))
4329 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4332 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4333 if (comm->dlbState == DlbState::offCanTurnOn)
4337 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4339 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4342 if (ir->ePBC == epbcNONE)
4344 vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
4349 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
4353 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4355 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4357 dd->ga2la = new gmx_ga2la_t(natoms_tot,
4358 static_cast<int>(vol_frac*natoms_tot));
4361 /*! \brief Set some important DD parameters that can be modified by env.vars */
4362 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4364 gmx_domdec_comm_t *comm = dd->comm;
4366 dd->bSendRecv2 = (dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0) != 0);
4367 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4368 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4369 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4370 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4371 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4372 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4374 if (dd->bSendRecv2 && fplog)
4376 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4383 fprintf(fplog, "Will load balance based on FLOP count\n");
4385 if (comm->eFlop > 1)
4387 srand(1 + rank_mysim);
4389 comm->bRecordLoad = TRUE;
4393 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4397 DomdecOptions::DomdecOptions() :
4398 checkBondedInteractions(TRUE),
4399 useBondedCommunication(TRUE),
4401 rankOrder(DdRankOrder::pp_pme),
4402 minimumCommunicationRange(0),
4403 constraintCommunicationRange(0),
4404 dlbOption(DlbOption::turnOnWhenUseful),
4410 clear_ivec(numCells);
4413 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4414 const DomdecOptions &options,
4415 const MdrunOptions &mdrunOptions,
4416 const gmx_mtop_t *mtop,
4417 const t_inputrec *ir,
4419 gmx::ArrayRef<const gmx::RVec> xGlobal,
4420 gmx::LocalAtomSetManager *atomSets)
4427 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4430 dd = new gmx_domdec_t;
4432 dd->comm = init_dd_comm();
4434 /* Initialize DD paritioning counters */
4435 dd->comm->partition_step = INT_MIN;
4438 set_dd_envvar_options(fplog, dd, cr->nodeid);
4440 gmx_ddbox_t ddbox = {0};
4441 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4446 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4448 if (thisRankHasDuty(cr, DUTY_PP))
4450 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4452 setup_neighbor_relations(dd);
4455 /* Set overallocation to avoid frequent reallocation of arrays */
4456 set_over_alloc_dd(TRUE);
4458 clear_dd_cycle_counts(dd);
4460 dd->atomSets = atomSets;
4465 static gmx_bool test_dd_cutoff(t_commrec *cr,
4466 t_state *state, const t_inputrec *ir,
4477 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4481 for (d = 0; d < dd->ndim; d++)
4485 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4486 if (dynamic_dd_box(&ddbox, ir))
4488 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4491 np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4493 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4495 if (np > dd->comm->cd[d].np_dlb)
4500 /* If a current local cell size is smaller than the requested
4501 * cut-off, we could still fix it, but this gets very complicated.
4502 * Without fixing here, we might actually need more checks.
4504 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4511 if (!isDlbDisabled(dd->comm))
4513 /* If DLB is not active yet, we don't need to check the grid jumps.
4514 * Actually we shouldn't, because then the grid jump data is not set.
4516 if (isDlbOn(dd->comm) &&
4517 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4522 gmx_sumi(1, &LocallyLimited, cr);
4524 if (LocallyLimited > 0)
4533 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4536 gmx_bool bCutoffAllowed;
4538 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4542 cr->dd->comm->cutoff = cutoff_req;
4545 return bCutoffAllowed;
4548 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4550 gmx_domdec_comm_t *comm;
4552 comm = cr->dd->comm;
4554 /* Turn on the DLB limiting (might have been on already) */
4555 comm->bPMELoadBalDLBLimits = TRUE;
4557 /* Change the cut-off limit */
4558 comm->PMELoadBal_max_cutoff = cutoff;
4562 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4563 comm->PMELoadBal_max_cutoff);
4567 /* Sets whether we should later check the load imbalance data, so that
4568 * we can trigger dynamic load balancing if enough imbalance has
4571 * Used after PME load balancing unlocks DLB, so that the check
4572 * whether DLB will be useful can happen immediately.
4574 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4576 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4578 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4582 /* Store the DD partitioning count, so we can ignore cycle counts
4583 * over the next nstlist steps, which are often slower.
4585 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4590 /* Returns if we should check whether there has been enough load
4591 * imbalance to trigger dynamic load balancing.
4593 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4595 if (dd->comm->dlbState != DlbState::offCanTurnOn)
4600 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4602 /* We ignore the first nstlist steps at the start of the run
4603 * or after PME load balancing or after turning DLB off, since
4604 * these often have extra allocation or cache miss overhead.
4609 if (dd->comm->cycl_n[ddCyclStep] == 0)
4611 /* We can have zero timed steps when dd_partition_system is called
4612 * more than once at the same step, e.g. with replica exchange.
4613 * Turning on DLB would trigger an assertion failure later, but is
4614 * also useless right after exchanging replicas.
4619 /* We should check whether we should use DLB directly after
4621 if (dd->comm->bCheckWhetherToTurnDlbOn)
4623 /* This flag was set when the PME load-balancing routines
4624 unlocked DLB, and should now be cleared. */
4625 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4628 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4629 * partitionings (we do not do this every partioning, so that we
4630 * avoid excessive communication). */
4631 return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
4634 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4636 return isDlbOn(dd->comm);
4639 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4641 return (dd->comm->dlbState == DlbState::offTemporarilyLocked);
4644 void dd_dlb_lock(gmx_domdec_t *dd)
4646 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4647 if (dd->comm->dlbState == DlbState::offCanTurnOn)
4649 dd->comm->dlbState = DlbState::offTemporarilyLocked;
4653 void dd_dlb_unlock(gmx_domdec_t *dd)
4655 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4656 if (dd->comm->dlbState == DlbState::offTemporarilyLocked)
4658 dd->comm->dlbState = DlbState::offCanTurnOn;
4659 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4663 static void merge_cg_buffers(int ncell,
4664 gmx_domdec_comm_dim_t *cd, int pulse,
4666 gmx::ArrayRef<int> index_gl,
4668 rvec *cg_cm, rvec *recv_vr,
4669 gmx::ArrayRef<int> cgindex,
4670 cginfo_mb_t *cginfo_mb, int *cginfo)
4672 gmx_domdec_ind_t *ind, *ind_p;
4673 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4674 int shift, shift_at;
4676 ind = &cd->ind[pulse];
4678 /* First correct the already stored data */
4679 shift = ind->nrecv[ncell];
4680 for (cell = ncell-1; cell >= 0; cell--)
4682 shift -= ind->nrecv[cell];
4685 /* Move the cg's present from previous grid pulses */
4686 cg0 = ncg_cell[ncell+cell];
4687 cg1 = ncg_cell[ncell+cell+1];
4688 cgindex[cg1+shift] = cgindex[cg1];
4689 for (cg = cg1-1; cg >= cg0; cg--)
4691 index_gl[cg+shift] = index_gl[cg];
4692 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4693 cgindex[cg+shift] = cgindex[cg];
4694 cginfo[cg+shift] = cginfo[cg];
4696 /* Correct the already stored send indices for the shift */
4697 for (p = 1; p <= pulse; p++)
4699 ind_p = &cd->ind[p];
4701 for (c = 0; c < cell; c++)
4703 cg0 += ind_p->nsend[c];
4705 cg1 = cg0 + ind_p->nsend[cell];
4706 for (cg = cg0; cg < cg1; cg++)
4708 ind_p->index[cg] += shift;
4714 /* Merge in the communicated buffers */
4718 for (cell = 0; cell < ncell; cell++)
4720 cg1 = ncg_cell[ncell+cell+1] + shift;
4723 /* Correct the old cg indices */
4724 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4726 cgindex[cg+1] += shift_at;
4729 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4731 /* Copy this charge group from the buffer */
4732 index_gl[cg1] = recv_i[cg0];
4733 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4734 /* Add it to the cgindex */
4735 cg_gl = index_gl[cg1];
4736 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4737 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4738 cgindex[cg1+1] = cgindex[cg1] + nat;
4743 shift += ind->nrecv[cell];
4744 ncg_cell[ncell+cell+1] = cg1;
4748 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4751 const gmx::RangePartitioning &atomGroups)
4753 /* Store the atom block boundaries for easy copying of communication buffers
4755 int g = atomGroupStart;
4756 for (int zone = 0; zone < nzone; zone++)
4758 for (gmx_domdec_ind_t &ind : cd->ind)
4760 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4761 ind.cell2at0[zone] = range.begin();
4762 ind.cell2at1[zone] = range.end();
4763 g += ind.nrecv[zone];
4768 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4774 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4776 if (!bLocalCG[link->a[i]])
4785 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4787 real c[DIM][4]; /* the corners for the non-bonded communication */
4788 real cr0; /* corner for rounding */
4789 real cr1[4]; /* corners for rounding */
4790 real bc[DIM]; /* corners for bounded communication */
4791 real bcr1; /* corner for rounding for bonded communication */
4794 /* Determine the corners of the domain(s) we are communicating with */
4796 set_dd_corners(const gmx_domdec_t *dd,
4797 int dim0, int dim1, int dim2,
4801 const gmx_domdec_comm_t *comm;
4802 const gmx_domdec_zones_t *zones;
4807 zones = &comm->zones;
4809 /* Keep the compiler happy */
4813 /* The first dimension is equal for all cells */
4814 c->c[0][0] = comm->cell_x0[dim0];
4817 c->bc[0] = c->c[0][0];
4822 /* This cell row is only seen from the first row */
4823 c->c[1][0] = comm->cell_x0[dim1];
4824 /* All rows can see this row */
4825 c->c[1][1] = comm->cell_x0[dim1];
4826 if (isDlbOn(dd->comm))
4828 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4831 /* For the multi-body distance we need the maximum */
4832 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4835 /* Set the upper-right corner for rounding */
4836 c->cr0 = comm->cell_x1[dim0];
4841 for (j = 0; j < 4; j++)
4843 c->c[2][j] = comm->cell_x0[dim2];
4845 if (isDlbOn(dd->comm))
4847 /* Use the maximum of the i-cells that see a j-cell */
4848 for (i = 0; i < zones->nizone; i++)
4850 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4855 std::max(c->c[2][j-4],
4856 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4862 /* For the multi-body distance we need the maximum */
4863 c->bc[2] = comm->cell_x0[dim2];
4864 for (i = 0; i < 2; i++)
4866 for (j = 0; j < 2; j++)
4868 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4874 /* Set the upper-right corner for rounding */
4875 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4876 * Only cell (0,0,0) can see cell 7 (1,1,1)
4878 c->cr1[0] = comm->cell_x1[dim1];
4879 c->cr1[3] = comm->cell_x1[dim1];
4880 if (isDlbOn(dd->comm))
4882 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4885 /* For the multi-body distance we need the maximum */
4886 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4893 /* Add the atom groups we need to send in this pulse from this zone to
4894 * \p localAtomGroups and \p work
4897 get_zone_pulse_cgs(gmx_domdec_t *dd,
4898 int zonei, int zone,
4900 gmx::ArrayRef<const int> globalAtomGroupIndices,
4901 const gmx::RangePartitioning &atomGroups,
4902 int dim, int dim_ind,
4903 int dim0, int dim1, int dim2,
4904 real r_comm2, real r_bcomm2,
4906 bool distanceIsTriclinic,
4908 real skew_fac2_d, real skew_fac_01,
4909 rvec *v_d, rvec *v_0, rvec *v_1,
4910 const dd_corners_t *c,
4911 const rvec sf2_round,
4912 gmx_bool bDistBonded,
4918 std::vector<int> *localAtomGroups,
4919 dd_comm_setup_work_t *work)
4921 gmx_domdec_comm_t *comm;
4923 gmx_bool bDistMB_pulse;
4925 real r2, rb2, r, tric_sh;
4932 bScrew = (dd->bScrewPBC && dim == XX);
4934 bDistMB_pulse = (bDistMB && bDistBonded);
4936 /* Unpack the work data */
4937 std::vector<int> &ibuf = work->atomGroupBuffer;
4938 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4942 for (cg = cg0; cg < cg1; cg++)
4946 if (!distanceIsTriclinic)
4948 /* Rectangular direction, easy */
4949 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4956 r = cg_cm[cg][dim] - c->bc[dim_ind];
4962 /* Rounding gives at most a 16% reduction
4963 * in communicated atoms
4965 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4967 r = cg_cm[cg][dim0] - c->cr0;
4968 /* This is the first dimension, so always r >= 0 */
4975 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4977 r = cg_cm[cg][dim1] - c->cr1[zone];
4984 r = cg_cm[cg][dim1] - c->bcr1;
4994 /* Triclinic direction, more complicated */
4997 /* Rounding, conservative as the skew_fac multiplication
4998 * will slightly underestimate the distance.
5000 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
5002 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
5003 for (i = dim0+1; i < DIM; i++)
5005 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
5007 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
5010 rb[dim0] = rn[dim0];
5013 /* Take care that the cell planes along dim0 might not
5014 * be orthogonal to those along dim1 and dim2.
5016 for (i = 1; i <= dim_ind; i++)
5019 if (normal[dim0][dimd] > 0)
5021 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5024 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5029 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5031 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5033 for (i = dim1+1; i < DIM; i++)
5035 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5037 rn[dim1] += tric_sh;
5040 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5041 /* Take care of coupling of the distances
5042 * to the planes along dim0 and dim1 through dim2.
5044 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5045 /* Take care that the cell planes along dim1
5046 * might not be orthogonal to that along dim2.
5048 if (normal[dim1][dim2] > 0)
5050 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5056 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5059 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5060 /* Take care of coupling of the distances
5061 * to the planes along dim0 and dim1 through dim2.
5063 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5064 /* Take care that the cell planes along dim1
5065 * might not be orthogonal to that along dim2.
5067 if (normal[dim1][dim2] > 0)
5069 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5074 /* The distance along the communication direction */
5075 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5077 for (i = dim+1; i < DIM; i++)
5079 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5084 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5085 /* Take care of coupling of the distances
5086 * to the planes along dim0 and dim1 through dim2.
5088 if (dim_ind == 1 && zonei == 1)
5090 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5096 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5099 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5100 /* Take care of coupling of the distances
5101 * to the planes along dim0 and dim1 through dim2.
5103 if (dim_ind == 1 && zonei == 1)
5105 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5113 ((bDistMB && rb2 < r_bcomm2) ||
5114 (bDist2B && r2 < r_bcomm2)) &&
5116 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5117 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5120 /* Store the local and global atom group indices and position */
5121 localAtomGroups->push_back(cg);
5122 ibuf.push_back(globalAtomGroupIndices[cg]);
5126 if (dd->ci[dim] == 0)
5128 /* Correct cg_cm for pbc */
5129 rvec_add(cg_cm[cg], box[dim], posPbc);
5132 posPbc[YY] = box[YY][YY] - posPbc[YY];
5133 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5138 copy_rvec(cg_cm[cg], posPbc);
5140 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5142 nat += atomGroups.block(cg).size();
5147 work->nsend_zone = nsend_z;
5150 static void clearCommSetupData(dd_comm_setup_work_t *work)
5152 work->localAtomGroupBuffer.clear();
5153 work->atomGroupBuffer.clear();
5154 work->positionBuffer.clear();
5156 work->nsend_zone = 0;
5159 static void setup_dd_communication(gmx_domdec_t *dd,
5160 matrix box, gmx_ddbox_t *ddbox,
5162 t_state *state, PaddedRVecVector *f)
5164 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5165 int nzone, nzone_send, zone, zonei, cg0, cg1;
5166 int c, i, cg, cg_gl, nrcg;
5167 int *zone_cg_range, pos_cg;
5168 gmx_domdec_comm_t *comm;
5169 gmx_domdec_zones_t *zones;
5170 gmx_domdec_comm_dim_t *cd;
5171 cginfo_mb_t *cginfo_mb;
5172 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5173 real r_comm2, r_bcomm2;
5174 dd_corners_t corners;
5175 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5176 real skew_fac2_d, skew_fac_01;
5181 fprintf(debug, "Setting up DD communication\n");
5186 if (comm->dth.empty())
5188 /* Initialize the thread data.
5189 * This can not be done in init_domain_decomposition,
5190 * as the numbers of threads is determined later.
5192 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5193 comm->dth.resize(numThreads);
5196 switch (fr->cutoff_scheme)
5202 cg_cm = as_rvec_array(state->x.data());
5205 gmx_incons("unimplemented");
5208 bBondComm = comm->bBondComm;
5210 /* Do we need to determine extra distances for multi-body bondeds? */
5211 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5213 /* Do we need to determine extra distances for only two-body bondeds? */
5214 bDist2B = (bBondComm && !bDistMB);
5216 r_comm2 = gmx::square(comm->cutoff);
5217 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5221 fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
5224 zones = &comm->zones;
5227 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5228 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5230 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5232 /* Triclinic stuff */
5233 normal = ddbox->normal;
5237 v_0 = ddbox->v[dim0];
5238 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5240 /* Determine the coupling coefficient for the distances
5241 * to the cell planes along dim0 and dim1 through dim2.
5242 * This is required for correct rounding.
5245 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5248 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5254 v_1 = ddbox->v[dim1];
5257 zone_cg_range = zones->cg_range;
5258 cginfo_mb = fr->cginfo_mb;
5260 zone_cg_range[0] = 0;
5261 zone_cg_range[1] = dd->ncg_home;
5262 comm->zone_ncg1[0] = dd->ncg_home;
5263 pos_cg = dd->ncg_home;
5265 nat_tot = comm->atomRanges.numHomeAtoms();
5267 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5269 dim = dd->dim[dim_ind];
5270 cd = &comm->cd[dim_ind];
5272 /* Check if we need to compute triclinic distances along this dim */
5273 bool distanceIsTriclinic = false;
5274 for (i = 0; i <= dim_ind; i++)
5276 if (ddbox->tric_dir[dd->dim[i]])
5278 distanceIsTriclinic = true;
5282 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5284 /* No pbc in this dimension, the first node should not comm. */
5292 v_d = ddbox->v[dim];
5293 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5295 cd->receiveInPlace = true;
5296 for (int p = 0; p < cd->numPulses(); p++)
5298 /* Only atoms communicated in the first pulse are used
5299 * for multi-body bonded interactions or for bBondComm.
5301 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5303 gmx_domdec_ind_t *ind = &cd->ind[p];
5305 /* Thread 0 writes in the global index array */
5307 clearCommSetupData(&comm->dth[0]);
5309 for (zone = 0; zone < nzone_send; zone++)
5311 if (dim_ind > 0 && distanceIsTriclinic)
5313 /* Determine slightly more optimized skew_fac's
5315 * This reduces the number of communicated atoms
5316 * by about 10% for 3D DD of rhombic dodecahedra.
5318 for (dimd = 0; dimd < dim; dimd++)
5320 sf2_round[dimd] = 1;
5321 if (ddbox->tric_dir[dimd])
5323 for (i = dd->dim[dimd]+1; i < DIM; i++)
5325 /* If we are shifted in dimension i
5326 * and the cell plane is tilted forward
5327 * in dimension i, skip this coupling.
5329 if (!(zones->shift[nzone+zone][i] &&
5330 ddbox->v[dimd][i][dimd] >= 0))
5333 gmx::square(ddbox->v[dimd][i][dimd]);
5336 sf2_round[dimd] = 1/sf2_round[dimd];
5341 zonei = zone_perm[dim_ind][zone];
5344 /* Here we permutate the zones to obtain a convenient order
5345 * for neighbor searching
5347 cg0 = zone_cg_range[zonei];
5348 cg1 = zone_cg_range[zonei+1];
5352 /* Look only at the cg's received in the previous grid pulse
5354 cg1 = zone_cg_range[nzone+zone+1];
5355 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5358 const int numThreads = static_cast<int>(comm->dth.size());
5359 #pragma omp parallel for num_threads(numThreads) schedule(static)
5360 for (int th = 0; th < numThreads; th++)
5364 dd_comm_setup_work_t &work = comm->dth[th];
5366 /* Retain data accumulated into buffers of thread 0 */
5369 clearCommSetupData(&work);
5372 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5373 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5375 /* Get the cg's for this pulse in this zone */
5376 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5377 dd->globalAtomGroupIndices,
5379 dim, dim_ind, dim0, dim1, dim2,
5381 box, distanceIsTriclinic,
5382 normal, skew_fac2_d, skew_fac_01,
5383 v_d, v_0, v_1, &corners, sf2_round,
5384 bDistBonded, bBondComm,
5387 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5390 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5393 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5394 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5395 ind->nsend[zone] = comm->dth[0].nsend_zone;
5396 /* Append data of threads>=1 to the communication buffers */
5397 for (int th = 1; th < numThreads; th++)
5399 const dd_comm_setup_work_t &dth = comm->dth[th];
5401 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5402 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5403 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5404 comm->dth[0].nat += dth.nat;
5405 ind->nsend[zone] += dth.nsend_zone;
5408 /* Clear the counts in case we do not have pbc */
5409 for (zone = nzone_send; zone < nzone; zone++)
5411 ind->nsend[zone] = 0;
5413 ind->nsend[nzone] = ind->index.size();
5414 ind->nsend[nzone + 1] = comm->dth[0].nat;
5415 /* Communicate the number of cg's and atoms to receive */
5416 ddSendrecv(dd, dim_ind, dddirBackward,
5417 ind->nsend, nzone+2,
5418 ind->nrecv, nzone+2);
5422 /* We can receive in place if only the last zone is not empty */
5423 for (zone = 0; zone < nzone-1; zone++)
5425 if (ind->nrecv[zone] > 0)
5427 cd->receiveInPlace = false;
5432 int receiveBufferSize = 0;
5433 if (!cd->receiveInPlace)
5435 receiveBufferSize = ind->nrecv[nzone];
5437 /* These buffer are actually only needed with in-place */
5438 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5439 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5441 dd_comm_setup_work_t &work = comm->dth[0];
5443 /* Make space for the global cg indices */
5444 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5445 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5446 /* Communicate the global cg indices */
5447 gmx::ArrayRef<int> integerBufferRef;
5448 if (cd->receiveInPlace)
5450 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5454 integerBufferRef = globalAtomGroupBuffer.buffer;
5456 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5457 work.atomGroupBuffer, integerBufferRef);
5459 /* Make space for cg_cm */
5460 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5461 if (fr->cutoff_scheme == ecutsGROUP)
5467 cg_cm = as_rvec_array(state->x.data());
5469 /* Communicate cg_cm */
5470 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5471 if (cd->receiveInPlace)
5473 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5477 rvecBufferRef = rvecBuffer.buffer;
5479 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5480 work.positionBuffer, rvecBufferRef);
5482 /* Make the charge group index */
5483 if (cd->receiveInPlace)
5485 zone = (p == 0 ? 0 : nzone - 1);
5486 while (zone < nzone)
5488 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5490 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5491 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5492 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5493 dd->atomGrouping_.appendBlock(nrcg);
5496 /* Update the charge group presence,
5497 * so we can use it in the next pass of the loop.
5499 comm->bLocalCG[cg_gl] = TRUE;
5505 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5508 zone_cg_range[nzone+zone] = pos_cg;
5513 /* This part of the code is never executed with bBondComm. */
5514 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5515 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5517 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5518 dd->globalAtomGroupIndices, integerBufferRef.data(),
5519 cg_cm, as_rvec_array(rvecBufferRef.data()),
5521 fr->cginfo_mb, fr->cginfo);
5522 pos_cg += ind->nrecv[nzone];
5524 nat_tot += ind->nrecv[nzone+1];
5526 if (!cd->receiveInPlace)
5528 /* Store the atom block for easy copying of communication buffers */
5529 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5534 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5538 /* We don't need to update cginfo, since that was alrady done above.
5539 * So we pass NULL for the forcerec.
5541 dd_set_cginfo(dd->globalAtomGroupIndices,
5542 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5543 nullptr, comm->bLocalCG);
5548 fprintf(debug, "Finished setting up DD communication, zones:");
5549 for (c = 0; c < zones->n; c++)
5551 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5553 fprintf(debug, "\n");
5557 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5561 for (c = 0; c < zones->nizone; c++)
5563 zones->izone[c].cg1 = zones->cg_range[c+1];
5564 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5565 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5569 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5571 * Also sets the atom density for the home zone when \p zone_start=0.
5572 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5573 * how many charge groups will move but are still part of the current range.
5574 * \todo When converting domdec to use proper classes, all these variables
5575 * should be private and a method should return the correct count
5576 * depending on an internal state.
5578 * \param[in,out] dd The domain decomposition struct
5579 * \param[in] box The box
5580 * \param[in] ddbox The domain decomposition box struct
5581 * \param[in] zone_start The start of the zone range to set sizes for
5582 * \param[in] zone_end The end of the zone range to set sizes for
5583 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5585 static void set_zones_size(gmx_domdec_t *dd,
5586 matrix box, const gmx_ddbox_t *ddbox,
5587 int zone_start, int zone_end,
5588 int numMovedChargeGroupsInHomeZone)
5590 gmx_domdec_comm_t *comm;
5591 gmx_domdec_zones_t *zones;
5600 zones = &comm->zones;
5602 /* Do we need to determine extra distances for multi-body bondeds? */
5603 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5605 for (z = zone_start; z < zone_end; z++)
5607 /* Copy cell limits to zone limits.
5608 * Valid for non-DD dims and non-shifted dims.
5610 copy_rvec(comm->cell_x0, zones->size[z].x0);
5611 copy_rvec(comm->cell_x1, zones->size[z].x1);
5614 for (d = 0; d < dd->ndim; d++)
5618 for (z = 0; z < zones->n; z++)
5620 /* With a staggered grid we have different sizes
5621 * for non-shifted dimensions.
5623 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5627 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5628 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5632 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5633 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5639 rcmbs = comm->cutoff_mbody;
5640 if (ddbox->tric_dir[dim])
5642 rcs /= ddbox->skew_fac[dim];
5643 rcmbs /= ddbox->skew_fac[dim];
5646 /* Set the lower limit for the shifted zone dimensions */
5647 for (z = zone_start; z < zone_end; z++)
5649 if (zones->shift[z][dim] > 0)
5652 if (!isDlbOn(dd->comm) || d == 0)
5654 zones->size[z].x0[dim] = comm->cell_x1[dim];
5655 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5659 /* Here we take the lower limit of the zone from
5660 * the lowest domain of the zone below.
5664 zones->size[z].x0[dim] =
5665 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5671 zones->size[z].x0[dim] =
5672 zones->size[zone_perm[2][z-4]].x0[dim];
5676 zones->size[z].x0[dim] =
5677 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5680 /* A temporary limit, is updated below */
5681 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5685 for (zi = 0; zi < zones->nizone; zi++)
5687 if (zones->shift[zi][dim] == 0)
5689 /* This takes the whole zone into account.
5690 * With multiple pulses this will lead
5691 * to a larger zone then strictly necessary.
5693 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5694 zones->size[zi].x1[dim]+rcmbs);
5702 /* Loop over the i-zones to set the upper limit of each
5705 for (zi = 0; zi < zones->nizone; zi++)
5707 if (zones->shift[zi][dim] == 0)
5709 /* We should only use zones up to zone_end */
5710 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5711 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5713 if (zones->shift[z][dim] > 0)
5715 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5716 zones->size[zi].x1[dim]+rcs);
5723 for (z = zone_start; z < zone_end; z++)
5725 /* Initialization only required to keep the compiler happy */
5726 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5729 /* To determine the bounding box for a zone we need to find
5730 * the extreme corners of 4, 2 or 1 corners.
5732 nc = 1 << (ddbox->nboundeddim - 1);
5734 for (c = 0; c < nc; c++)
5736 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5740 corner[YY] = zones->size[z].x0[YY];
5744 corner[YY] = zones->size[z].x1[YY];
5748 corner[ZZ] = zones->size[z].x0[ZZ];
5752 corner[ZZ] = zones->size[z].x1[ZZ];
5754 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5755 box[ZZ][1 - dd->dim[0]] != 0)
5757 /* With 1D domain decomposition the cg's are not in
5758 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5759 * Shift the corner of the z-vector back to along the box
5760 * vector of dimension d, so it will later end up at 0 along d.
5761 * This can affect the location of this corner along dd->dim[0]
5762 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5764 int d = 1 - dd->dim[0];
5766 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5768 /* Apply the triclinic couplings */
5769 assert(ddbox->npbcdim <= DIM);
5770 for (i = YY; i < ddbox->npbcdim; i++)
5772 for (j = XX; j < i; j++)
5774 corner[j] += corner[i]*box[i][j]/box[i][i];
5779 copy_rvec(corner, corner_min);
5780 copy_rvec(corner, corner_max);
5784 for (i = 0; i < DIM; i++)
5786 corner_min[i] = std::min(corner_min[i], corner[i]);
5787 corner_max[i] = std::max(corner_max[i], corner[i]);
5791 /* Copy the extreme cornes without offset along x */
5792 for (i = 0; i < DIM; i++)
5794 zones->size[z].bb_x0[i] = corner_min[i];
5795 zones->size[z].bb_x1[i] = corner_max[i];
5797 /* Add the offset along x */
5798 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5799 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5802 if (zone_start == 0)
5805 for (dim = 0; dim < DIM; dim++)
5807 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5809 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5814 for (z = zone_start; z < zone_end; z++)
5816 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5818 zones->size[z].x0[XX], zones->size[z].x1[XX],
5819 zones->size[z].x0[YY], zones->size[z].x1[YY],
5820 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5821 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5823 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5824 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5825 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5830 static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
5835 return a.ind_gl < b.ind_gl;
5837 return a.nsc < b.nsc;
5840 /* Order data in \p dataToSort according to \p sort
5842 * Note: both buffers should have at least \p sort.size() elements.
5844 template <typename T>
5846 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5847 gmx::ArrayRef<T> dataToSort,
5848 gmx::ArrayRef<T> sortBuffer)
5850 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5851 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5853 /* Order the data into the temporary buffer */
5855 for (const gmx_cgsort_t &entry : sort)
5857 sortBuffer[i++] = dataToSort[entry.ind];
5860 /* Copy back to the original array */
5861 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5862 dataToSort.begin());
5865 /* Order data in \p dataToSort according to \p sort
5867 * Note: \p vectorToSort should have at least \p sort.size() elements,
5868 * \p workVector is resized when it is too small.
5870 template <typename T>
5872 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5873 gmx::ArrayRef<T> vectorToSort,
5874 std::vector<T> *workVector)
5876 if (gmx::index(workVector->size()) < sort.size())
5878 workVector->resize(sort.size());
5880 orderVector<T>(sort, vectorToSort, *workVector);
5883 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5884 gmx::ArrayRef<const gmx_cgsort_t> sort,
5885 gmx::ArrayRef<gmx::RVec> v,
5886 gmx::ArrayRef<gmx::RVec> buf)
5888 if (atomGroups == nullptr)
5890 /* Avoid the useless loop of the atoms within a cg */
5891 orderVector(sort, v, buf);
5896 /* Order the data */
5898 for (const gmx_cgsort_t &entry : sort)
5900 for (int i : atomGroups->block(entry.ind))
5902 copy_rvec(v[i], buf[a]);
5908 /* Copy back to the original array */
5909 for (int a = 0; a < atot; a++)
5911 copy_rvec(buf[a], v[a]);
5915 /* Returns whether a < b */
5916 static bool compareCgsort(const gmx_cgsort_t &a,
5917 const gmx_cgsort_t &b)
5919 return (a.nsc < b.nsc ||
5920 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5923 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5924 gmx::ArrayRef<gmx_cgsort_t> moved,
5925 std::vector<gmx_cgsort_t> *sort1)
5927 /* The new indices are not very ordered, so we qsort them */
5928 std::sort(moved.begin(), moved.end(), comp_cgsort);
5930 /* stationary is already ordered, so now we can merge the two arrays */
5931 sort1->resize(stationary.size() + moved.size());
5932 std::merge(stationary.begin(), stationary.end(),
5933 moved.begin(), moved.end(),
5938 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5939 * The order is according to the global charge group index.
5940 * This adds and removes charge groups that moved between domains.
5942 static void dd_sort_order(const gmx_domdec_t *dd,
5943 const t_forcerec *fr,
5945 gmx_domdec_sort_t *sort)
5947 const int *a = fr->ns->grid->cell_index;
5949 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5951 if (ncg_home_old >= 0)
5953 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5954 std::vector<gmx_cgsort_t> &moved = sort->moved;
5956 /* The charge groups that remained in the same ns grid cell
5957 * are completely ordered. So we can sort efficiently by sorting
5958 * the charge groups that did move into the stationary list.
5959 * Note: push_back() seems to be slightly slower than direct access.
5963 for (int i = 0; i < dd->ncg_home; i++)
5965 /* Check if this cg did not move to another node */
5966 if (a[i] < movedValue)
5970 entry.ind_gl = dd->globalAtomGroupIndices[i];
5973 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5975 /* This cg is new on this node or moved ns grid cell */
5976 moved.push_back(entry);
5980 /* This cg did not move */
5981 stationary.push_back(entry);
5988 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5989 stationary.size(), moved.size());
5991 /* Sort efficiently */
5992 orderedSort(stationary, moved, &sort->sorted);
5996 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5998 cgsort.reserve(dd->ncg_home);
6000 for (int i = 0; i < dd->ncg_home; i++)
6002 /* Sort on the ns grid cell indices
6003 * and the global topology index
6007 entry.ind_gl = dd->globalAtomGroupIndices[i];
6009 cgsort.push_back(entry);
6010 if (cgsort[i].nsc < movedValue)
6017 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
6019 /* Determine the order of the charge groups using qsort */
6020 std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
6022 /* Remove the charge groups which are no longer at home here */
6023 cgsort.resize(numCGNew);
6027 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
6028 static void dd_sort_order_nbnxn(const t_forcerec *fr,
6029 std::vector<gmx_cgsort_t> *sort)
6031 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
6033 /* Using push_back() instead of this resize results in much slower code */
6034 sort->resize(atomOrder.size());
6035 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
6036 size_t numSorted = 0;
6037 for (int i : atomOrder)
6041 /* The values of nsc and ind_gl are not used in this case */
6042 buffer[numSorted++].ind = i;
6045 sort->resize(numSorted);
6048 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6051 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6053 switch (fr->cutoff_scheme)
6056 dd_sort_order(dd, fr, ncg_home_old, sort);
6059 dd_sort_order_nbnxn(fr, &sort->sorted);
6062 gmx_incons("unimplemented");
6065 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6067 /* We alloc with the old size, since cgindex is still old */
6068 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6069 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6071 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6073 /* Set the new home atom/charge group count */
6074 dd->ncg_home = sort->sorted.size();
6077 fprintf(debug, "Set the new home charge group count to %d\n",
6081 /* Reorder the state */
6082 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6083 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6085 if (state->flags & (1 << estX))
6087 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6089 if (state->flags & (1 << estV))
6091 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6093 if (state->flags & (1 << estCGP))
6095 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6098 if (fr->cutoff_scheme == ecutsGROUP)
6101 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6102 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6105 /* Reorder the global cg index */
6106 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6107 /* Reorder the cginfo */
6108 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6109 /* Rebuild the local cg index */
6112 /* We make a new, ordered atomGroups object and assign it to
6113 * the old one. This causes some allocation overhead, but saves
6114 * a copy back of the whole index.
6116 gmx::RangePartitioning ordered;
6117 for (const gmx_cgsort_t &entry : cgsort)
6119 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6121 dd->atomGrouping_ = ordered;
6125 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6127 /* Set the home atom number */
6128 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6130 if (fr->cutoff_scheme == ecutsVERLET)
6132 /* The atoms are now exactly in grid order, update the grid order */
6133 nbnxn_set_atomorder(fr->nbv->nbs.get());
6137 /* Copy the sorted ns cell indices back to the ns grid struct */
6138 for (gmx::index i = 0; i < cgsort.size(); i++)
6140 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6142 fr->ns->grid->nr = cgsort.size();
6146 static void add_dd_statistics(gmx_domdec_t *dd)
6148 gmx_domdec_comm_t *comm = dd->comm;
6150 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6152 auto range = static_cast<DDAtomRanges::Type>(i);
6154 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6159 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6161 gmx_domdec_comm_t *comm = dd->comm;
6163 /* Reset all the statistics and counters for total run counting */
6164 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6166 comm->sum_nat[i] = 0;
6170 comm->load_step = 0;
6173 clear_ivec(comm->load_lim);
6178 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6180 gmx_domdec_comm_t *comm = cr->dd->comm;
6182 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6183 gmx_sumd(numRanges, comm->sum_nat, cr);
6185 if (fplog == nullptr)
6190 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6192 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6194 auto range = static_cast<DDAtomRanges::Type>(i);
6195 double av = comm->sum_nat[i]/comm->ndecomp;
6198 case DDAtomRanges::Type::Zones:
6200 " av. #atoms communicated per step for force: %d x %.1f\n",
6203 case DDAtomRanges::Type::Vsites:
6204 if (cr->dd->vsite_comm)
6207 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6208 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6212 case DDAtomRanges::Type::Constraints:
6213 if (cr->dd->constraint_comm)
6216 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6217 1 + ir->nLincsIter, av);
6221 gmx_incons(" Unknown type for DD statistics");
6224 fprintf(fplog, "\n");
6226 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6228 print_dd_load_av(fplog, cr->dd);
6232 void dd_partition_system(FILE *fplog,
6234 const t_commrec *cr,
6235 gmx_bool bMasterState,
6237 t_state *state_global,
6238 const gmx_mtop_t *top_global,
6239 const t_inputrec *ir,
6240 t_state *state_local,
6241 PaddedRVecVector *f,
6242 gmx::MDAtoms *mdAtoms,
6243 gmx_localtop_t *top_local,
6246 gmx::Constraints *constr,
6248 gmx_wallcycle *wcycle,
6252 gmx_domdec_comm_t *comm;
6253 gmx_ddbox_t ddbox = {0};
6255 int64_t step_pcoupl;
6256 rvec cell_ns_x0, cell_ns_x1;
6257 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6258 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6259 gmx_bool bRedist, bResortAll;
6260 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6264 wallcycle_start(wcycle, ewcDOMDEC);
6269 // TODO if the update code becomes accessible here, use
6270 // upd->deform for this logic.
6271 bBoxChanged = (bMasterState || inputrecDeform(ir));
6272 if (ir->epc != epcNO)
6274 /* With nstpcouple > 1 pressure coupling happens.
6275 * one step after calculating the pressure.
6276 * Box scaling happens at the end of the MD step,
6277 * after the DD partitioning.
6278 * We therefore have to do DLB in the first partitioning
6279 * after an MD step where P-coupling occurred.
6280 * We need to determine the last step in which p-coupling occurred.
6281 * MRS -- need to validate this for vv?
6283 int n = ir->nstpcouple;
6286 step_pcoupl = step - 1;
6290 step_pcoupl = ((step - 1)/n)*n + 1;
6292 if (step_pcoupl >= comm->partition_step)
6298 bNStGlobalComm = (step % nstglobalcomm == 0);
6306 /* Should we do dynamic load balacing this step?
6307 * Since it requires (possibly expensive) global communication,
6308 * we might want to do DLB less frequently.
6310 if (bBoxChanged || ir->epc != epcNO)
6312 bDoDLB = bBoxChanged;
6316 bDoDLB = bNStGlobalComm;
6320 /* Check if we have recorded loads on the nodes */
6321 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6323 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6325 /* Print load every nstlog, first and last step to the log file */
6326 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6327 comm->n_load_collect == 0 ||
6329 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6331 /* Avoid extra communication due to verbose screen output
6332 * when nstglobalcomm is set.
6334 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6335 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6337 get_load_distribution(dd, wcycle);
6342 dd_print_load(fplog, dd, step-1);
6346 dd_print_load_verbose(dd);
6349 comm->n_load_collect++;
6355 /* Add the measured cycles to the running average */
6356 const float averageFactor = 0.1f;
6357 comm->cyclesPerStepDlbExpAverage =
6358 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6359 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6361 if (comm->dlbState == DlbState::onCanTurnOff &&
6362 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6364 gmx_bool turnOffDlb;
6367 /* If the running averaged cycles with DLB are more
6368 * than before we turned on DLB, turn off DLB.
6369 * We will again run and check the cycles without DLB
6370 * and we can then decide if to turn off DLB forever.
6372 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6373 comm->cyclesPerStepBeforeDLB);
6375 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6378 /* To turn off DLB, we need to redistribute the atoms */
6379 dd_collect_state(dd, state_local, state_global);
6380 bMasterState = TRUE;
6381 turn_off_dlb(fplog, cr, step);
6385 else if (bCheckWhetherToTurnDlbOn)
6387 gmx_bool turnOffDlbForever = FALSE;
6388 gmx_bool turnOnDlb = FALSE;
6390 /* Since the timings are node dependent, the master decides */
6393 /* If we recently turned off DLB, we want to check if
6394 * performance is better without DLB. We want to do this
6395 * ASAP to minimize the chance that external factors
6396 * slowed down the DLB step are gone here and we
6397 * incorrectly conclude that DLB was causing the slowdown.
6398 * So we measure one nstlist block, no running average.
6400 if (comm->haveTurnedOffDlb &&
6401 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6402 comm->cyclesPerStepDlbExpAverage)
6404 /* After turning off DLB we ran nstlist steps in fewer
6405 * cycles than with DLB. This likely means that DLB
6406 * in not benefical, but this could be due to a one
6407 * time unlucky fluctuation, so we require two such
6408 * observations in close succession to turn off DLB
6411 if (comm->dlbSlowerPartitioningCount > 0 &&
6412 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6414 turnOffDlbForever = TRUE;
6416 comm->haveTurnedOffDlb = false;
6417 /* Register when we last measured DLB slowdown */
6418 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6422 /* Here we check if the max PME rank load is more than 0.98
6423 * the max PP force load. If so, PP DLB will not help,
6424 * since we are (almost) limited by PME. Furthermore,
6425 * DLB will cause a significant extra x/f redistribution
6426 * cost on the PME ranks, which will then surely result
6427 * in lower total performance.
6429 if (cr->npmenodes > 0 &&
6430 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6436 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6442 gmx_bool turnOffDlbForever;
6446 turnOffDlbForever, turnOnDlb
6448 dd_bcast(dd, sizeof(bools), &bools);
6449 if (bools.turnOffDlbForever)
6451 turn_off_dlb_forever(fplog, cr, step);
6453 else if (bools.turnOnDlb)
6455 turn_on_dlb(fplog, cr, step);
6460 comm->n_load_have++;
6463 cgs_gl = &comm->cgs_gl;
6468 /* Clear the old state */
6469 clearDDStateIndices(dd, 0, 0);
6472 auto xGlobal = positionsFromStatePointer(state_global);
6474 set_ddbox(dd, true, ir,
6475 DDMASTER(dd) ? state_global->box : nullptr,
6479 distributeState(fplog, dd, state_global, ddbox, state_local, f);
6481 dd_make_local_cgs(dd, &top_local->cgs);
6483 /* Ensure that we have space for the new distribution */
6484 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6486 if (fr->cutoff_scheme == ecutsGROUP)
6488 calc_cgcm(fplog, 0, dd->ncg_home,
6489 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6492 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6494 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6496 else if (state_local->ddp_count != dd->ddp_count)
6498 if (state_local->ddp_count > dd->ddp_count)
6500 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%ld)", state_local->ddp_count, dd->ddp_count);
6503 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6505 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6508 /* Clear the old state */
6509 clearDDStateIndices(dd, 0, 0);
6511 /* Restore the atom group indices from state_local */
6512 restoreAtomGroups(dd, cgs_gl->index, state_local);
6513 make_dd_indices(dd, cgs_gl->index, 0);
6514 ncgindex_set = dd->ncg_home;
6516 if (fr->cutoff_scheme == ecutsGROUP)
6518 /* Redetermine the cg COMs */
6519 calc_cgcm(fplog, 0, dd->ncg_home,
6520 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6523 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6525 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6527 set_ddbox(dd, bMasterState, ir, state_local->box,
6528 true, state_local->x, &ddbox);
6530 bRedist = isDlbOn(comm);
6534 /* We have the full state, only redistribute the cgs */
6536 /* Clear the non-home indices */
6537 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6540 /* Avoid global communication for dim's without pbc and -gcom */
6541 if (!bNStGlobalComm)
6543 copy_rvec(comm->box0, ddbox.box0 );
6544 copy_rvec(comm->box_size, ddbox.box_size);
6546 set_ddbox(dd, bMasterState, ir, state_local->box,
6547 bNStGlobalComm, state_local->x, &ddbox);
6552 /* For dim's without pbc and -gcom */
6553 copy_rvec(ddbox.box0, comm->box0 );
6554 copy_rvec(ddbox.box_size, comm->box_size);
6556 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6559 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6561 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6564 /* Check if we should sort the charge groups */
6565 const bool bSortCG = (bMasterState || bRedist);
6567 ncg_home_old = dd->ncg_home;
6569 /* When repartitioning we mark charge groups that will move to neighboring
6570 * DD cells, but we do not move them right away for performance reasons.
6571 * Thus we need to keep track of how many charge groups will move for
6572 * obtaining correct local charge group / atom counts.
6577 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6579 ncgindex_set = dd->ncg_home;
6580 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6584 GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
6586 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6589 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6591 &comm->cell_x0, &comm->cell_x1,
6592 dd->ncg_home, fr->cg_cm,
6593 cell_ns_x0, cell_ns_x1, &grid_density);
6597 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6600 switch (fr->cutoff_scheme)
6603 copy_ivec(fr->ns->grid->n, ncells_old);
6604 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6605 state_local->box, cell_ns_x0, cell_ns_x1,
6606 fr->rlist, grid_density);
6609 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6612 gmx_incons("unimplemented");
6614 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6615 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6619 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6621 /* Sort the state on charge group position.
6622 * This enables exact restarts from this step.
6623 * It also improves performance by about 15% with larger numbers
6624 * of atoms per node.
6627 /* Fill the ns grid with the home cell,
6628 * so we can sort with the indices.
6630 set_zones_ncg_home(dd);
6632 switch (fr->cutoff_scheme)
6635 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6637 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6639 comm->zones.size[0].bb_x0,
6640 comm->zones.size[0].bb_x1,
6642 comm->zones.dens_zone0,
6644 as_rvec_array(state_local->x.data()),
6645 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6646 fr->nbv->grp[eintLocal].kernel_type,
6649 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6652 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6653 0, dd->ncg_home, fr->cg_cm);
6655 copy_ivec(fr->ns->grid->n, ncells_new);
6658 gmx_incons("unimplemented");
6661 bResortAll = bMasterState;
6663 /* Check if we can user the old order and ns grid cell indices
6664 * of the charge groups to sort the charge groups efficiently.
6666 if (ncells_new[XX] != ncells_old[XX] ||
6667 ncells_new[YY] != ncells_old[YY] ||
6668 ncells_new[ZZ] != ncells_old[ZZ])
6675 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6676 gmx_step_str(step, sbuf), dd->ncg_home);
6678 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6679 bResortAll ? -1 : ncg_home_old);
6681 /* After sorting and compacting we set the correct size */
6682 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6684 /* Rebuild all the indices */
6688 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6691 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6693 /* Setup up the communication and communicate the coordinates */
6694 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6696 /* Set the indices */
6697 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6699 /* Set the charge group boundaries for neighbor searching */
6700 set_cg_boundaries(&comm->zones);
6702 if (fr->cutoff_scheme == ecutsVERLET)
6704 /* When bSortCG=true, we have already set the size for zone 0 */
6705 set_zones_size(dd, state_local->box, &ddbox,
6706 bSortCG ? 1 : 0, comm->zones.n,
6710 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6713 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6714 -1,as_rvec_array(state_local->x.data()),state_local->box);
6717 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6719 /* Extract a local topology from the global topology */
6720 for (int i = 0; i < dd->ndim; i++)
6722 np[dd->dim[i]] = comm->cd[i].numPulses();
6724 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6725 comm->cellsize_min, np,
6727 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6728 vsite, top_global, top_local);
6730 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6732 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6734 /* Set up the special atom communication */
6735 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6736 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6738 auto range = static_cast<DDAtomRanges::Type>(i);
6741 case DDAtomRanges::Type::Vsites:
6742 if (vsite && vsite->n_intercg_vsite)
6744 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6747 case DDAtomRanges::Type::Constraints:
6748 if (dd->bInterCGcons || dd->bInterCGsettles)
6750 /* Only for inter-cg constraints we need special code */
6751 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6752 constr, ir->nProjOrder,
6753 top_local->idef.il);
6757 gmx_incons("Unknown special atom type setup");
6759 comm->atomRanges.setEnd(range, n);
6762 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6764 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6766 /* Make space for the extra coordinates for virtual site
6767 * or constraint communication.
6769 state_local->natoms = comm->atomRanges.numAtomsTotal();
6771 dd_resize_state(state_local, f, state_local->natoms);
6773 if (fr->haveDirectVirialContributions)
6775 if (vsite && vsite->n_intercg_vsite)
6777 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6781 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6783 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6787 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6796 /* Set the number of atoms required for the force calculation.
6797 * Forces need to be constrained when doing energy
6798 * minimization. For simple simulations we could avoid some
6799 * allocation, zeroing and copying, but this is probably not worth
6800 * the complications and checking.
6802 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6803 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6804 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6807 /* Update atom data for mdatoms and several algorithms */
6808 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6809 nullptr, mdAtoms, constr, vsite, nullptr);
6811 auto mdatoms = mdAtoms->mdatoms();
6812 if (!thisRankHasDuty(cr, DUTY_PME))
6814 /* Send the charges and/or c6/sigmas to our PME only node */
6815 gmx_pme_send_parameters(cr,
6817 mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
6818 mdatoms->chargeA, mdatoms->chargeB,
6819 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6820 mdatoms->sigmaA, mdatoms->sigmaB,
6821 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6826 /* Update the local pull groups */
6827 dd_make_local_pull_groups(cr, ir->pull_work);
6830 if (dd->atomSets != nullptr)
6832 /* Update the local atom sets */
6833 dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
6836 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6837 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6839 add_dd_statistics(dd);
6841 /* Make sure we only count the cycles for this DD partitioning */
6842 clear_dd_cycle_counts(dd);
6844 /* Because the order of the atoms might have changed since
6845 * the last vsite construction, we need to communicate the constructing
6846 * atom coordinates again (for spreading the forces this MD step).
6848 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6850 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6852 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6854 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6855 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6856 -1, as_rvec_array(state_local->x.data()), state_local->box);
6859 /* Store the partitioning step */
6860 comm->partition_step = step;
6862 /* Increase the DD partitioning counter */
6864 /* The state currently matches this DD partitioning count, store it */
6865 state_local->ddp_count = dd->ddp_count;
6868 /* The DD master node knows the complete cg distribution,
6869 * store the count so we can possibly skip the cg info communication.
6871 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6874 if (comm->DD_debug > 0)
6876 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6877 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6878 "after partitioning");
6881 wallcycle_stop(wcycle, ewcDOMDEC);
6884 /*! \brief Check whether bonded interactions are missing, if appropriate */
6885 void checkNumberOfBondedInteractions(FILE *fplog,
6887 int totalNumberOfBondedInteractions,
6888 const gmx_mtop_t *top_global,
6889 const gmx_localtop_t *top_local,
6890 const t_state *state,
6891 bool *shouldCheckNumberOfBondedInteractions)
6893 if (*shouldCheckNumberOfBondedInteractions)
6895 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6897 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6899 *shouldCheckNumberOfBondedInteractions = false;