2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/gmx_omp_nthreads.h"
74 #include "gromacs/mdlib/lincs.h"
75 #include "gromacs/mdlib/mdatoms.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/mdsetup.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nsgrid.h"
81 #include "gromacs/mdlib/vsite.h"
82 #include "gromacs/mdtypes/commrec.h"
83 #include "gromacs/mdtypes/df_history.h"
84 #include "gromacs/mdtypes/forcerec.h"
85 #include "gromacs/mdtypes/inputrec.h"
86 #include "gromacs/mdtypes/md_enums.h"
87 #include "gromacs/mdtypes/mdatom.h"
88 #include "gromacs/mdtypes/nblist.h"
89 #include "gromacs/mdtypes/state.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/pbc.h"
92 #include "gromacs/pulling/pull.h"
93 #include "gromacs/pulling/pull_rotation.h"
94 #include "gromacs/swap/swapcoords.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/qsort_threadsafe.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/stringutil.h"
113 #include "atomdistribution.h"
114 #include "cellsizes.h"
115 #include "distribute.h"
116 #include "domdec_constraints.h"
117 #include "domdec_internal.h"
118 #include "domdec_vsite.h"
119 #include "redistribute.h"
122 #define DD_NLOAD_MAX 9
124 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
126 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
129 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
130 #define DD_FLAG_NRCG 65535
131 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
132 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
134 /* The DD zone order */
135 static const ivec dd_zo[DD_MAXZONE] =
136 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
138 /* The non-bonded zone-pair setup for domain decomposition
139 * The first number is the i-zone, the second number the first j-zone seen by
140 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
141 * As is, this is for 3D decomposition, where there are 4 i-zones.
142 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
143 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
146 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
151 /* Turn on DLB when the load imbalance causes this amount of total loss.
152 * There is a bit of overhead with DLB and it's difficult to achieve
153 * a load imbalance of less than 2% with DLB.
155 #define DD_PERF_LOSS_DLB_ON 0.02
157 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
158 #define DD_PERF_LOSS_WARN 0.05
161 /* We check if to turn on DLB at the first and every 100 DD partitionings.
162 * With large imbalance DLB will turn on at the first step, so we can
163 * make the interval so large that the MPI overhead of the check is negligible.
165 static const int c_checkTurnDlbOnInterval = 100;
166 /* We need to check if DLB results in worse performance and then turn it off.
167 * We check this more often then for turning DLB on, because the DLB can scale
168 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
169 * and furthermore, we are already synchronizing often with DLB, so
170 * the overhead of the MPI Bcast is not that high.
172 static const int c_checkTurnDlbOffInterval = 20;
174 /* Forward declaration */
175 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
179 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
181 static void index2xyz(ivec nc,int ind,ivec xyz)
183 xyz[XX] = ind % nc[XX];
184 xyz[YY] = (ind / nc[XX]) % nc[YY];
185 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
189 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
191 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
192 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
193 xyz[ZZ] = ind % nc[ZZ];
196 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
201 ddindex = dd_index(dd->nc, c);
202 if (dd->comm->bCartesianPP_PME)
204 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
206 else if (dd->comm->bCartesianPP)
209 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
220 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
222 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
225 int ddglatnr(const gmx_domdec_t *dd, int i)
235 if (i >= dd->comm->atomRanges.numAtomsTotal())
237 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
239 atnr = dd->globalAtomIndices[i] + 1;
245 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
247 return &dd->comm->cgs_gl;
250 void dd_store_state(gmx_domdec_t *dd, t_state *state)
254 if (state->ddp_count != dd->ddp_count)
256 gmx_incons("The MD state does not match the domain decomposition state");
259 state->cg_gl.resize(dd->ncg_home);
260 for (i = 0; i < dd->ncg_home; i++)
262 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
265 state->ddp_count_cg_gl = dd->ddp_count;
268 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
270 return &dd->comm->zones;
273 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
274 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
276 gmx_domdec_zones_t *zones;
279 zones = &dd->comm->zones;
282 while (icg >= zones->izone[izone].cg1)
291 else if (izone < zones->nizone)
293 *jcg0 = zones->izone[izone].jcg0;
297 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
298 icg, izone, zones->nizone);
301 *jcg1 = zones->izone[izone].jcg1;
303 for (d = 0; d < dd->ndim; d++)
306 shift0[dim] = zones->izone[izone].shift0[dim];
307 shift1[dim] = zones->izone[izone].shift1[dim];
308 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
310 /* A conservative approach, this can be optimized */
317 int dd_numHomeAtoms(const gmx_domdec_t &dd)
319 return dd.comm->atomRanges.numHomeAtoms();
322 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
324 /* We currently set mdatoms entries for all atoms:
325 * local + non-local + communicated for vsite + constraints
328 return dd->comm->atomRanges.numAtomsTotal();
331 int dd_natoms_vsite(const gmx_domdec_t *dd)
333 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
336 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
338 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
339 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
342 void dd_move_x(gmx_domdec_t *dd,
344 gmx::ArrayRef<gmx::RVec> x,
345 gmx_wallcycle *wcycle)
347 wallcycle_start(wcycle, ewcMOVEX);
350 gmx_domdec_comm_t *comm;
351 gmx_domdec_comm_dim_t *cd;
352 rvec shift = {0, 0, 0};
353 gmx_bool bPBC, bScrew;
357 const RangePartitioning &atomGrouping = dd->atomGrouping();
360 nat_tot = comm->atomRanges.numHomeAtoms();
361 for (int d = 0; d < dd->ndim; d++)
363 bPBC = (dd->ci[dd->dim[d]] == 0);
364 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
367 copy_rvec(box[dd->dim[d]], shift);
370 for (const gmx_domdec_ind_t &ind : cd->ind)
372 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
373 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
377 for (int g : ind.index)
379 for (int j : atomGrouping.block(g))
381 sendBuffer[n] = x[j];
388 for (int g : ind.index)
390 for (int j : atomGrouping.block(g))
392 /* We need to shift the coordinates */
393 for (int d = 0; d < DIM; d++)
395 sendBuffer[n][d] = x[j][d] + shift[d];
403 for (int g : ind.index)
405 for (int j : atomGrouping.block(g))
408 sendBuffer[n][XX] = x[j][XX] + shift[XX];
410 * This operation requires a special shift force
411 * treatment, which is performed in calc_vir.
413 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
414 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
420 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
422 gmx::ArrayRef<gmx::RVec> receiveBuffer;
423 if (cd->receiveInPlace)
425 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
429 receiveBuffer = receiveBufferAccess.buffer;
431 /* Send and receive the coordinates */
432 ddSendrecv(dd, d, dddirBackward,
433 sendBuffer, receiveBuffer);
435 if (!cd->receiveInPlace)
438 for (int zone = 0; zone < nzone; zone++)
440 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
442 x[i] = receiveBuffer[j++];
446 nat_tot += ind.nrecv[nzone+1];
451 wallcycle_stop(wcycle, ewcMOVEX);
454 void dd_move_f(gmx_domdec_t *dd,
455 gmx::ArrayRef<gmx::RVec> f,
457 gmx_wallcycle *wcycle)
459 wallcycle_start(wcycle, ewcMOVEF);
462 gmx_domdec_comm_t *comm;
463 gmx_domdec_comm_dim_t *cd;
466 gmx_bool bShiftForcesNeedPbc, bScrew;
470 const RangePartitioning &atomGrouping = dd->atomGrouping();
472 nzone = comm->zones.n/2;
473 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
474 for (int d = dd->ndim-1; d >= 0; d--)
476 /* Only forces in domains near the PBC boundaries need to
477 consider PBC in the treatment of fshift */
478 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
479 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
480 if (fshift == nullptr && !bScrew)
482 bShiftForcesNeedPbc = FALSE;
484 /* Determine which shift vector we need */
490 for (int p = cd->numPulses() - 1; p >= 0; p--)
492 const gmx_domdec_ind_t &ind = cd->ind[p];
493 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
494 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
496 nat_tot -= ind.nrecv[nzone+1];
498 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
500 gmx::ArrayRef<gmx::RVec> sendBuffer;
501 if (cd->receiveInPlace)
503 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
507 sendBuffer = sendBufferAccess.buffer;
509 for (int zone = 0; zone < nzone; zone++)
511 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
513 sendBuffer[j++] = f[i];
517 /* Communicate the forces */
518 ddSendrecv(dd, d, dddirForward,
519 sendBuffer, receiveBuffer);
520 /* Add the received forces */
522 if (!bShiftForcesNeedPbc)
524 for (int g : ind.index)
526 for (int j : atomGrouping.block(g))
528 for (int d = 0; d < DIM; d++)
530 f[j][d] += receiveBuffer[n][d];
538 /* fshift should always be defined if this function is
539 * called when bShiftForcesNeedPbc is true */
540 assert(nullptr != fshift);
541 for (int g : ind.index)
543 for (int j : atomGrouping.block(g))
545 for (int d = 0; d < DIM; d++)
547 f[j][d] += receiveBuffer[n][d];
549 /* Add this force to the shift force */
550 for (int d = 0; d < DIM; d++)
552 fshift[is][d] += receiveBuffer[n][d];
560 for (int g : ind.index)
562 for (int j : atomGrouping.block(g))
564 /* Rotate the force */
565 f[j][XX] += receiveBuffer[n][XX];
566 f[j][YY] -= receiveBuffer[n][YY];
567 f[j][ZZ] -= receiveBuffer[n][ZZ];
570 /* Add this force to the shift force */
571 for (int d = 0; d < DIM; d++)
573 fshift[is][d] += receiveBuffer[n][d];
583 wallcycle_stop(wcycle, ewcMOVEF);
586 /* Convenience function for extracting a real buffer from an rvec buffer
588 * To reduce the number of temporary communication buffers and avoid
589 * cache polution, we reuse gmx::RVec buffers for storing reals.
590 * This functions return a real buffer reference with the same number
591 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
593 static gmx::ArrayRef<real>
594 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
596 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
600 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
603 gmx_domdec_comm_t *comm;
604 gmx_domdec_comm_dim_t *cd;
608 const RangePartitioning &atomGrouping = dd->atomGrouping();
611 nat_tot = comm->atomRanges.numHomeAtoms();
612 for (int d = 0; d < dd->ndim; d++)
615 for (const gmx_domdec_ind_t &ind : cd->ind)
617 /* Note: We provision for RVec instead of real, so a factor of 3
618 * more than needed. The buffer actually already has this size
619 * and we pass a plain pointer below, so this does not matter.
621 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
622 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
624 for (int g : ind.index)
626 for (int j : atomGrouping.block(g))
628 sendBuffer[n++] = v[j];
632 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
634 gmx::ArrayRef<real> receiveBuffer;
635 if (cd->receiveInPlace)
637 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
641 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
643 /* Send and receive the data */
644 ddSendrecv(dd, d, dddirBackward,
645 sendBuffer, receiveBuffer);
646 if (!cd->receiveInPlace)
649 for (int zone = 0; zone < nzone; zone++)
651 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
653 v[i] = receiveBuffer[j++];
657 nat_tot += ind.nrecv[nzone+1];
663 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
666 gmx_domdec_comm_t *comm;
667 gmx_domdec_comm_dim_t *cd;
671 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
673 nzone = comm->zones.n/2;
674 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
675 for (int d = dd->ndim-1; d >= 0; d--)
678 for (int p = cd->numPulses() - 1; p >= 0; p--)
680 const gmx_domdec_ind_t &ind = cd->ind[p];
682 /* Note: We provision for RVec instead of real, so a factor of 3
683 * more than needed. The buffer actually already has this size
684 * and we typecast, so this works as intended.
686 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
687 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
688 nat_tot -= ind.nrecv[nzone + 1];
690 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
692 gmx::ArrayRef<real> sendBuffer;
693 if (cd->receiveInPlace)
695 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
699 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
701 for (int zone = 0; zone < nzone; zone++)
703 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
705 sendBuffer[j++] = v[i];
709 /* Communicate the forces */
710 ddSendrecv(dd, d, dddirForward,
711 sendBuffer, receiveBuffer);
712 /* Add the received forces */
714 for (int g : ind.index)
716 for (int j : atomGrouping.block(g))
718 v[j] += receiveBuffer[n];
727 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
729 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
731 zone->min0, zone->max1,
732 zone->mch0, zone->mch0,
733 zone->p1_0, zone->p1_1);
736 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
737 * takes the extremes over all home and remote zones in the halo
738 * and returns the results in cell_ns_x0 and cell_ns_x1.
739 * Note: only used with the group cut-off scheme.
741 static void dd_move_cellx(gmx_domdec_t *dd,
742 const gmx_ddbox_t *ddbox,
746 constexpr int c_ddZoneCommMaxNumZones = 5;
747 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
748 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
749 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
750 gmx_domdec_comm_t *comm = dd->comm;
754 for (int d = 1; d < dd->ndim; d++)
756 int dim = dd->dim[d];
757 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
759 /* Copy the base sizes of the home zone */
760 zp.min0 = cell_ns_x0[dim];
761 zp.max1 = cell_ns_x1[dim];
762 zp.min1 = cell_ns_x1[dim];
763 zp.mch0 = cell_ns_x0[dim];
764 zp.mch1 = cell_ns_x1[dim];
765 zp.p1_0 = cell_ns_x0[dim];
766 zp.p1_1 = cell_ns_x1[dim];
769 /* Loop backward over the dimensions and aggregate the extremes
772 for (int d = dd->ndim - 2; d >= 0; d--)
774 int dim = dd->dim[d];
775 bool applyPbc = (dim < ddbox->npbcdim);
777 /* Use an rvec to store two reals */
778 extr_s[d][0] = comm->cell_f0[d+1];
779 extr_s[d][1] = comm->cell_f1[d+1];
780 extr_s[d][2] = comm->cell_f1[d+1];
783 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
784 /* Store the extremes in the backward sending buffer,
785 * so they get updated separately from the forward communication.
787 for (int d1 = d; d1 < dd->ndim-1; d1++)
789 /* We invert the order to be able to use the same loop for buf_e */
790 buf_s[pos].min0 = extr_s[d1][1];
791 buf_s[pos].max1 = extr_s[d1][0];
792 buf_s[pos].min1 = extr_s[d1][2];
795 /* Store the cell corner of the dimension we communicate along */
796 buf_s[pos].p1_0 = comm->cell_x0[dim];
801 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
804 if (dd->ndim == 3 && d == 0)
806 buf_s[pos] = comm->zone_d2[0][1];
808 buf_s[pos] = comm->zone_d1[0];
812 /* We only need to communicate the extremes
813 * in the forward direction
815 int numPulses = comm->cd[d].numPulses();
819 /* Take the minimum to avoid double communication */
820 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
824 /* Without PBC we should really not communicate over
825 * the boundaries, but implementing that complicates
826 * the communication setup and therefore we simply
827 * do all communication, but ignore some data.
829 numPulsesMin = numPulses;
831 for (int pulse = 0; pulse < numPulsesMin; pulse++)
833 /* Communicate the extremes forward */
834 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
836 int numElements = dd->ndim - d - 1;
837 ddSendrecv(dd, d, dddirForward,
838 extr_s + d, numElements,
839 extr_r + d, numElements);
841 if (receiveValidData)
843 for (int d1 = d; d1 < dd->ndim - 1; d1++)
845 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
846 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
847 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
852 const int numElementsInBuffer = pos;
853 for (int pulse = 0; pulse < numPulses; pulse++)
855 /* Communicate all the zone information backward */
856 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
858 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
860 int numReals = numElementsInBuffer*c_ddzoneNumReals;
861 ddSendrecv(dd, d, dddirBackward,
862 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
863 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
868 for (int d1 = d + 1; d1 < dd->ndim; d1++)
870 /* Determine the decrease of maximum required
871 * communication height along d1 due to the distance along d,
872 * this avoids a lot of useless atom communication.
874 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
877 if (ddbox->tric_dir[dim])
879 /* c is the off-diagonal coupling between the cell planes
880 * along directions d and d1.
882 c = ddbox->v[dim][dd->dim[d1]][dim];
888 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
891 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
895 /* A negative value signals out of range */
901 /* Accumulate the extremes over all pulses */
902 for (int i = 0; i < numElementsInBuffer; i++)
910 if (receiveValidData)
912 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
913 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
914 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
918 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
926 if (receiveValidData && dh[d1] >= 0)
928 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
929 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
932 /* Copy the received buffer to the send buffer,
933 * to pass the data through with the next pulse.
937 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
938 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
940 /* Store the extremes */
943 for (int d1 = d; d1 < dd->ndim-1; d1++)
945 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
946 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
947 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
951 if (d == 1 || (d == 0 && dd->ndim == 3))
953 for (int i = d; i < 2; i++)
955 comm->zone_d2[1-d][i] = buf_e[pos];
961 comm->zone_d1[1] = buf_e[pos];
970 int dim = dd->dim[1];
971 for (int i = 0; i < 2; i++)
975 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
977 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
978 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
983 int dim = dd->dim[2];
984 for (int i = 0; i < 2; i++)
986 for (int j = 0; j < 2; j++)
990 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
992 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
993 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
997 for (int d = 1; d < dd->ndim; d++)
999 comm->cell_f_max0[d] = extr_s[d-1][0];
1000 comm->cell_f_min1[d] = extr_s[d-1][1];
1003 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1004 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1009 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1010 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1012 rvec grid_s[2], *grid_r = nullptr, cx, r;
1013 char fname[STRLEN], buf[22];
1015 int a, i, d, z, y, x;
1019 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1020 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1024 snew(grid_r, 2*dd->nnodes);
1027 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1031 for (d = 0; d < DIM; d++)
1033 for (i = 0; i < DIM; i++)
1041 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1043 tric[d][i] = box[i][d]/box[i][i];
1052 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1053 out = gmx_fio_fopen(fname, "w");
1054 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1056 for (i = 0; i < dd->nnodes; i++)
1058 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1059 for (d = 0; d < DIM; d++)
1061 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1063 for (z = 0; z < 2; z++)
1065 for (y = 0; y < 2; y++)
1067 for (x = 0; x < 2; x++)
1069 cx[XX] = grid_r[i*2+x][XX];
1070 cx[YY] = grid_r[i*2+y][YY];
1071 cx[ZZ] = grid_r[i*2+z][ZZ];
1073 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1074 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1078 for (d = 0; d < DIM; d++)
1080 for (x = 0; x < 4; x++)
1084 case 0: y = 1 + i*8 + 2*x; break;
1085 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1086 case 2: y = 1 + i*8 + x; break;
1088 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1092 gmx_fio_fclose(out);
1097 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1098 const gmx_mtop_t *mtop, const t_commrec *cr,
1099 int natoms, const rvec x[], const matrix box)
1101 char fname[STRLEN], buf[22];
1104 const char *atomname, *resname;
1110 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1113 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1115 out = gmx_fio_fopen(fname, "w");
1117 fprintf(out, "TITLE %s\n", title);
1118 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1120 for (int i = 0; i < natoms; i++)
1122 int ii = dd->globalAtomIndices[i];
1123 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1126 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1129 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1135 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1137 b = dd->comm->zones.n;
1141 b = dd->comm->zones.n + 1;
1143 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1144 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1146 fprintf(out, "TER\n");
1148 gmx_fio_fclose(out);
1151 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1153 gmx_domdec_comm_t *comm;
1160 if (comm->bInterCGBondeds)
1162 if (comm->cutoff_mbody > 0)
1164 r = comm->cutoff_mbody;
1168 /* cutoff_mbody=0 means we do not have DLB */
1169 r = comm->cellsize_min[dd->dim[0]];
1170 for (di = 1; di < dd->ndim; di++)
1172 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1174 if (comm->bBondComm)
1176 r = std::max(r, comm->cutoff_mbody);
1180 r = std::min(r, comm->cutoff);
1188 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1192 r_mb = dd_cutoff_multibody(dd);
1194 return std::max(dd->comm->cutoff, r_mb);
1198 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1203 nc = dd->nc[dd->comm->cartpmedim];
1204 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1205 copy_ivec(coord, coord_pme);
1206 coord_pme[dd->comm->cartpmedim] =
1207 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1210 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1215 npme = dd->comm->npmenodes;
1217 /* Here we assign a PME node to communicate with this DD node
1218 * by assuming that the major index of both is x.
1219 * We add cr->npmenodes/2 to obtain an even distribution.
1221 return (ddindex*npme + npme/2)/npp;
1224 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1229 snew(pme_rank, dd->comm->npmenodes);
1231 for (i = 0; i < dd->nnodes; i++)
1233 p0 = ddindex2pmeindex(dd, i);
1234 p1 = ddindex2pmeindex(dd, i+1);
1235 if (i+1 == dd->nnodes || p1 > p0)
1239 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1241 pme_rank[n] = i + 1 + n;
1249 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1257 if (dd->comm->bCartesian) {
1258 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1259 dd_coords2pmecoords(dd,coords,coords_pme);
1260 copy_ivec(dd->ntot,nc);
1261 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1262 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1264 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1266 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1272 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1277 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1279 gmx_domdec_comm_t *comm;
1281 int ddindex, nodeid = -1;
1283 comm = cr->dd->comm;
1288 if (comm->bCartesianPP_PME)
1291 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1296 ddindex = dd_index(cr->dd->nc, coords);
1297 if (comm->bCartesianPP)
1299 nodeid = comm->ddindex2simnodeid[ddindex];
1305 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1317 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1318 const t_commrec gmx_unused *cr,
1323 const gmx_domdec_comm_t *comm = dd->comm;
1325 /* This assumes a uniform x domain decomposition grid cell size */
1326 if (comm->bCartesianPP_PME)
1329 ivec coord, coord_pme;
1330 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1331 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1333 /* This is a PP node */
1334 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1335 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1339 else if (comm->bCartesianPP)
1341 if (sim_nodeid < dd->nnodes)
1343 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1348 /* This assumes DD cells with identical x coordinates
1349 * are numbered sequentially.
1351 if (dd->comm->pmenodes == nullptr)
1353 if (sim_nodeid < dd->nnodes)
1355 /* The DD index equals the nodeid */
1356 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1362 while (sim_nodeid > dd->comm->pmenodes[i])
1366 if (sim_nodeid < dd->comm->pmenodes[i])
1368 pmenode = dd->comm->pmenodes[i];
1376 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1381 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1392 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1396 ivec coord, coord_pme;
1400 std::vector<int> ddranks;
1401 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1403 for (x = 0; x < dd->nc[XX]; x++)
1405 for (y = 0; y < dd->nc[YY]; y++)
1407 for (z = 0; z < dd->nc[ZZ]; z++)
1409 if (dd->comm->bCartesianPP_PME)
1414 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1415 if (dd->ci[XX] == coord_pme[XX] &&
1416 dd->ci[YY] == coord_pme[YY] &&
1417 dd->ci[ZZ] == coord_pme[ZZ])
1419 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1424 /* The slab corresponds to the nodeid in the PME group */
1425 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1427 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1436 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1438 gmx_bool bReceive = TRUE;
1440 if (cr->npmenodes < dd->nnodes)
1442 gmx_domdec_comm_t *comm = dd->comm;
1443 if (comm->bCartesianPP_PME)
1446 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1448 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1449 coords[comm->cartpmedim]++;
1450 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1453 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1454 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1456 /* This is not the last PP node for pmenode */
1461 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1466 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1467 if (cr->sim_nodeid+1 < cr->nnodes &&
1468 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1470 /* This is not the last PP node for pmenode */
1479 static void set_zones_ncg_home(gmx_domdec_t *dd)
1481 gmx_domdec_zones_t *zones;
1484 zones = &dd->comm->zones;
1486 zones->cg_range[0] = 0;
1487 for (i = 1; i < zones->n+1; i++)
1489 zones->cg_range[i] = dd->ncg_home;
1491 /* zone_ncg1[0] should always be equal to ncg_home */
1492 dd->comm->zone_ncg1[0] = dd->ncg_home;
1495 static void restoreAtomGroups(gmx_domdec_t *dd,
1496 const int *gcgs_index, const t_state *state)
1498 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1500 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1501 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1503 globalAtomGroupIndices.resize(atomGroupsState.size());
1504 atomGrouping.clear();
1506 /* Copy back the global charge group indices from state
1507 * and rebuild the local charge group to atom index.
1509 for (unsigned int i = 0; i < atomGroupsState.size(); i++)
1511 const int atomGroupGlobal = atomGroupsState[i];
1512 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1513 globalAtomGroupIndices[i] = atomGroupGlobal;
1514 atomGrouping.appendBlock(groupSize);
1517 dd->ncg_home = atomGroupsState.size();
1518 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1520 set_zones_ncg_home(dd);
1523 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1524 t_forcerec *fr, char *bLocalCG)
1526 cginfo_mb_t *cginfo_mb;
1532 cginfo_mb = fr->cginfo_mb;
1533 cginfo = fr->cginfo;
1535 for (cg = cg0; cg < cg1; cg++)
1537 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1541 if (bLocalCG != nullptr)
1543 for (cg = cg0; cg < cg1; cg++)
1545 bLocalCG[index_gl[cg]] = TRUE;
1550 static void make_dd_indices(gmx_domdec_t *dd,
1551 const int *gcgs_index, int cg_start)
1553 const int numZones = dd->comm->zones.n;
1554 const int *zone2cg = dd->comm->zones.cg_range;
1555 const int *zone_ncg1 = dd->comm->zone_ncg1;
1556 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1557 const gmx_bool bCGs = dd->comm->bCGs;
1559 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1561 if (zone2cg[1] != dd->ncg_home)
1563 gmx_incons("dd->ncg_zone is not up to date");
1566 /* Make the local to global and global to local atom index */
1567 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1568 globalAtomIndices.resize(a);
1569 for (int zone = 0; zone < numZones; zone++)
1578 cg0 = zone2cg[zone];
1580 int cg1 = zone2cg[zone+1];
1581 int cg1_p1 = cg0 + zone_ncg1[zone];
1583 for (int cg = cg0; cg < cg1; cg++)
1588 /* Signal that this cg is from more than one pulse away */
1591 int cg_gl = globalAtomGroupIndices[cg];
1594 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1596 globalAtomIndices.push_back(a_gl);
1597 ga2la_set(dd->ga2la, a_gl, a, zone1);
1603 globalAtomIndices.push_back(cg_gl);
1604 ga2la_set(dd->ga2la, cg_gl, a, zone1);
1611 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1615 if (bLocalCG == nullptr)
1619 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1621 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1624 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1629 for (int i = 0; i < ncg_sys; i++)
1636 if (ngl != dd->globalAtomGroupIndices.size())
1638 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1645 static void check_index_consistency(gmx_domdec_t *dd,
1646 int natoms_sys, int ncg_sys,
1651 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1653 if (dd->comm->DD_debug > 1)
1655 std::vector<int> have(natoms_sys);
1656 for (int a = 0; a < numAtomsInZones; a++)
1658 int globalAtomIndex = dd->globalAtomIndices[a];
1659 if (have[globalAtomIndex] > 0)
1661 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1665 have[globalAtomIndex] = a + 1;
1670 std::vector<int> have(numAtomsInZones);
1673 for (int i = 0; i < natoms_sys; i++)
1677 if (ga2la_get(dd->ga2la, i, &a, &cell))
1679 if (a >= numAtomsInZones)
1681 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1687 if (dd->globalAtomIndices[a] != i)
1689 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1696 if (ngl != numAtomsInZones)
1699 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1700 dd->rank, where, ngl, numAtomsInZones);
1702 for (int a = 0; a < numAtomsInZones; a++)
1707 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1708 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1712 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1716 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1717 dd->rank, where, nerr);
1721 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1722 static void clearDDStateIndices(gmx_domdec_t *dd,
1728 /* Clear the whole list without searching */
1729 ga2la_clear(dd->ga2la);
1733 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1734 for (int i = 0; i < numAtomsInZones; i++)
1736 ga2la_del(dd->ga2la, dd->globalAtomIndices[i]);
1740 char *bLocalCG = dd->comm->bLocalCG;
1743 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1745 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1749 dd_clear_local_vsite_indices(dd);
1751 if (dd->constraints)
1753 dd_clear_local_constraint_indices(dd);
1757 static gmx_bool check_grid_jump(gmx_int64_t step,
1763 gmx_domdec_comm_t *comm;
1772 for (d = 1; d < dd->ndim; d++)
1775 limit = grid_jump_limit(comm, cutoff, d);
1776 bfac = ddbox->box_size[dim];
1777 if (ddbox->tric_dir[dim])
1779 bfac *= ddbox->skew_fac[dim];
1781 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
1782 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
1790 /* This error should never be triggered under normal
1791 * circumstances, but you never know ...
1793 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1794 gmx_step_str(step, buf),
1795 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1803 static float dd_force_load(gmx_domdec_comm_t *comm)
1810 if (comm->eFlop > 1)
1812 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1817 load = comm->cycl[ddCyclF];
1818 if (comm->cycl_n[ddCyclF] > 1)
1820 /* Subtract the maximum of the last n cycle counts
1821 * to get rid of possible high counts due to other sources,
1822 * for instance system activity, that would otherwise
1823 * affect the dynamic load balancing.
1825 load -= comm->cycl_max[ddCyclF];
1829 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1831 float gpu_wait, gpu_wait_sum;
1833 gpu_wait = comm->cycl[ddCyclWaitGPU];
1834 if (comm->cycl_n[ddCyclF] > 1)
1836 /* We should remove the WaitGPU time of the same MD step
1837 * as the one with the maximum F time, since the F time
1838 * and the wait time are not independent.
1839 * Furthermore, the step for the max F time should be chosen
1840 * the same on all ranks that share the same GPU.
1841 * But to keep the code simple, we remove the average instead.
1842 * The main reason for artificially long times at some steps
1843 * is spurious CPU activity or MPI time, so we don't expect
1844 * that changes in the GPU wait time matter a lot here.
1846 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
1848 /* Sum the wait times over the ranks that share the same GPU */
1849 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1850 comm->mpi_comm_gpu_shared);
1851 /* Replace the wait time by the average over the ranks */
1852 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1860 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1862 gmx_domdec_comm_t *comm;
1867 snew(*dim_f, dd->nc[dim]+1);
1869 for (i = 1; i < dd->nc[dim]; i++)
1871 if (comm->slb_frac[dim])
1873 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1877 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
1880 (*dim_f)[dd->nc[dim]] = 1;
1883 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1885 int pmeindex, slab, nso, i;
1888 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1894 ddpme->dim = dimind;
1896 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1898 ddpme->nslab = (ddpme->dim == 0 ?
1899 dd->comm->npmenodes_x :
1900 dd->comm->npmenodes_y);
1902 if (ddpme->nslab <= 1)
1907 nso = dd->comm->npmenodes/ddpme->nslab;
1908 /* Determine for each PME slab the PP location range for dimension dim */
1909 snew(ddpme->pp_min, ddpme->nslab);
1910 snew(ddpme->pp_max, ddpme->nslab);
1911 for (slab = 0; slab < ddpme->nslab; slab++)
1913 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1914 ddpme->pp_max[slab] = 0;
1916 for (i = 0; i < dd->nnodes; i++)
1918 ddindex2xyz(dd->nc, i, xyz);
1919 /* For y only use our y/z slab.
1920 * This assumes that the PME x grid size matches the DD grid size.
1922 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1924 pmeindex = ddindex2pmeindex(dd, i);
1927 slab = pmeindex/nso;
1931 slab = pmeindex % ddpme->nslab;
1933 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1934 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1938 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1941 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1943 if (dd->comm->ddpme[0].dim == XX)
1945 return dd->comm->ddpme[0].maxshift;
1953 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1955 if (dd->comm->ddpme[0].dim == YY)
1957 return dd->comm->ddpme[0].maxshift;
1959 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1961 return dd->comm->ddpme[1].maxshift;
1969 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1971 rvec cell_ns_x0, rvec cell_ns_x1,
1974 gmx_domdec_comm_t *comm;
1979 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1981 dim = dd->dim[dim_ind];
1983 /* Without PBC we don't have restrictions on the outer cells */
1984 if (!(dim >= ddbox->npbcdim &&
1985 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1987 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1988 comm->cellsize_min[dim])
1991 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1992 gmx_step_str(step, buf), dim2char(dim),
1993 comm->cell_x1[dim] - comm->cell_x0[dim],
1994 ddbox->skew_fac[dim],
1995 dd->comm->cellsize_min[dim],
1996 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2000 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2002 /* Communicate the boundaries and update cell_ns_x0/1 */
2003 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2004 if (isDlbOn(dd->comm) && dd->ndim > 1)
2006 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2011 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2013 /* Note that the cycles value can be incorrect, either 0 or some
2014 * extremely large value, when our thread migrated to another core
2015 * with an unsynchronized cycle counter. If this happens less often
2016 * that once per nstlist steps, this will not cause issues, since
2017 * we later subtract the maximum value from the sum over nstlist steps.
2018 * A zero count will slightly lower the total, but that's a small effect.
2019 * Note that the main purpose of the subtraction of the maximum value
2020 * is to avoid throwing off the load balancing when stalls occur due
2021 * e.g. system activity or network congestion.
2023 dd->comm->cycl[ddCycl] += cycles;
2024 dd->comm->cycl_n[ddCycl]++;
2025 if (cycles > dd->comm->cycl_max[ddCycl])
2027 dd->comm->cycl_max[ddCycl] = cycles;
2031 static double force_flop_count(t_nrnb *nrnb)
2038 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2040 /* To get closer to the real timings, we half the count
2041 * for the normal loops and again half it for water loops.
2044 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2046 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2050 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2053 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2056 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2058 sum += nrnb->n[i]*cost_nrnb(i);
2061 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2063 sum += nrnb->n[i]*cost_nrnb(i);
2069 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2071 if (dd->comm->eFlop)
2073 dd->comm->flop -= force_flop_count(nrnb);
2076 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2078 if (dd->comm->eFlop)
2080 dd->comm->flop += force_flop_count(nrnb);
2085 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2089 for (i = 0; i < ddCyclNr; i++)
2091 dd->comm->cycl[i] = 0;
2092 dd->comm->cycl_n[i] = 0;
2093 dd->comm->cycl_max[i] = 0;
2096 dd->comm->flop_n = 0;
2099 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2101 gmx_domdec_comm_t *comm;
2102 domdec_load_t *load;
2103 domdec_root_t *root = nullptr;
2105 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2110 fprintf(debug, "get_load_distribution start\n");
2113 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2117 bSepPME = (dd->pme_nodeid >= 0);
2119 if (dd->ndim == 0 && bSepPME)
2121 /* Without decomposition, but with PME nodes, we need the load */
2122 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2123 comm->load[0].pme = comm->cycl[ddCyclPME];
2126 for (d = dd->ndim-1; d >= 0; d--)
2129 /* Check if we participate in the communication in this dimension */
2130 if (d == dd->ndim-1 ||
2131 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2133 load = &comm->load[d];
2134 if (isDlbOn(dd->comm))
2136 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
2139 if (d == dd->ndim-1)
2141 sbuf[pos++] = dd_force_load(comm);
2142 sbuf[pos++] = sbuf[0];
2143 if (isDlbOn(dd->comm))
2145 sbuf[pos++] = sbuf[0];
2146 sbuf[pos++] = cell_frac;
2149 sbuf[pos++] = comm->cell_f_max0[d];
2150 sbuf[pos++] = comm->cell_f_min1[d];
2155 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2156 sbuf[pos++] = comm->cycl[ddCyclPME];
2161 sbuf[pos++] = comm->load[d+1].sum;
2162 sbuf[pos++] = comm->load[d+1].max;
2163 if (isDlbOn(dd->comm))
2165 sbuf[pos++] = comm->load[d+1].sum_m;
2166 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2167 sbuf[pos++] = comm->load[d+1].flags;
2170 sbuf[pos++] = comm->cell_f_max0[d];
2171 sbuf[pos++] = comm->cell_f_min1[d];
2176 sbuf[pos++] = comm->load[d+1].mdf;
2177 sbuf[pos++] = comm->load[d+1].pme;
2181 /* Communicate a row in DD direction d.
2182 * The communicators are setup such that the root always has rank 0.
2185 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2186 load->load, load->nload*sizeof(float), MPI_BYTE,
2187 0, comm->mpi_comm_load[d]);
2189 if (dd->ci[dim] == dd->master_ci[dim])
2191 /* We are the root, process this row */
2194 root = comm->root[d];
2204 for (i = 0; i < dd->nc[dim]; i++)
2206 load->sum += load->load[pos++];
2207 load->max = std::max(load->max, load->load[pos]);
2209 if (isDlbOn(dd->comm))
2213 /* This direction could not be load balanced properly,
2214 * therefore we need to use the maximum iso the average load.
2216 load->sum_m = std::max(load->sum_m, load->load[pos]);
2220 load->sum_m += load->load[pos];
2223 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2227 load->flags = (int)(load->load[pos++] + 0.5);
2231 root->cell_f_max0[i] = load->load[pos++];
2232 root->cell_f_min1[i] = load->load[pos++];
2237 load->mdf = std::max(load->mdf, load->load[pos]);
2239 load->pme = std::max(load->pme, load->load[pos]);
2243 if (isDlbOn(comm) && root->bLimited)
2245 load->sum_m *= dd->nc[dim];
2246 load->flags |= (1<<d);
2254 comm->nload += dd_load_count(comm);
2255 comm->load_step += comm->cycl[ddCyclStep];
2256 comm->load_sum += comm->load[0].sum;
2257 comm->load_max += comm->load[0].max;
2260 for (d = 0; d < dd->ndim; d++)
2262 if (comm->load[0].flags & (1<<d))
2264 comm->load_lim[d]++;
2270 comm->load_mdf += comm->load[0].mdf;
2271 comm->load_pme += comm->load[0].pme;
2275 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2279 fprintf(debug, "get_load_distribution finished\n");
2283 static float dd_force_load_fraction(gmx_domdec_t *dd)
2285 /* Return the relative performance loss on the total run time
2286 * due to the force calculation load imbalance.
2288 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2290 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2298 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2300 /* Return the relative performance loss on the total run time
2301 * due to the force calculation load imbalance.
2303 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2306 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2307 (dd->comm->load_step*dd->nnodes);
2315 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2317 gmx_domdec_comm_t *comm = dd->comm;
2319 /* Only the master rank prints loads and only if we measured loads */
2320 if (!DDMASTER(dd) || comm->nload == 0)
2326 int numPpRanks = dd->nnodes;
2327 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2328 int numRanks = numPpRanks + numPmeRanks;
2329 float lossFraction = 0;
2331 /* Print the average load imbalance and performance loss */
2332 if (dd->nnodes > 1 && comm->load_sum > 0)
2334 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2335 lossFraction = dd_force_imb_perf_loss(dd);
2337 std::string msg = "\n Dynamic load balancing report:\n";
2338 std::string dlbStateStr;
2340 switch (dd->comm->dlbState)
2343 dlbStateStr = "DLB was off during the run per user request.";
2345 case edlbsOffForever:
2346 /* Currectly this can happen due to performance loss observed, cell size
2347 * limitations or incompatibility with other settings observed during
2348 * determineInitialDlbState(). */
2349 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2351 case edlbsOffCanTurnOn:
2352 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2354 case edlbsOffTemporarilyLocked:
2355 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2357 case edlbsOnCanTurnOff:
2358 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2361 dlbStateStr = "DLB was permanently on during the run per user request.";
2364 GMX_ASSERT(false, "Undocumented DLB state");
2367 msg += " " + dlbStateStr + "\n";
2368 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2369 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2370 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2371 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2373 fprintf(fplog, "%s", msg.c_str());
2374 fprintf(stderr, "%s", msg.c_str());
2377 /* Print during what percentage of steps the load balancing was limited */
2378 bool dlbWasLimited = false;
2381 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2382 for (int d = 0; d < dd->ndim; d++)
2384 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2385 sprintf(buf+strlen(buf), " %c %d %%",
2386 dim2char(dd->dim[d]), limitPercentage);
2387 if (limitPercentage >= 50)
2389 dlbWasLimited = true;
2392 sprintf(buf + strlen(buf), "\n");
2393 fprintf(fplog, "%s", buf);
2394 fprintf(stderr, "%s", buf);
2397 /* Print the performance loss due to separate PME - PP rank imbalance */
2398 float lossFractionPme = 0;
2399 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2401 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2402 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2403 if (lossFractionPme <= 0)
2405 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2409 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2411 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2412 fprintf(fplog, "%s", buf);
2413 fprintf(stderr, "%s", buf);
2414 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2415 fprintf(fplog, "%s", buf);
2416 fprintf(stderr, "%s", buf);
2418 fprintf(fplog, "\n");
2419 fprintf(stderr, "\n");
2421 if (lossFraction >= DD_PERF_LOSS_WARN)
2424 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2425 " in the domain decomposition.\n", lossFraction*100);
2428 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2430 else if (dlbWasLimited)
2432 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2434 fprintf(fplog, "%s\n", buf);
2435 fprintf(stderr, "%s\n", buf);
2437 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2440 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2441 " had %s work to do than the PP ranks.\n"
2442 " You might want to %s the number of PME ranks\n"
2443 " or %s the cut-off and the grid spacing.\n",
2444 std::fabs(lossFractionPme*100),
2445 (lossFractionPme < 0) ? "less" : "more",
2446 (lossFractionPme < 0) ? "decrease" : "increase",
2447 (lossFractionPme < 0) ? "decrease" : "increase");
2448 fprintf(fplog, "%s\n", buf);
2449 fprintf(stderr, "%s\n", buf);
2453 static float dd_vol_min(gmx_domdec_t *dd)
2455 return dd->comm->load[0].cvol_min*dd->nnodes;
2458 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
2460 return dd->comm->load[0].flags;
2463 static float dd_f_imbal(gmx_domdec_t *dd)
2465 if (dd->comm->load[0].sum > 0)
2467 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2471 /* Something is wrong in the cycle counting, report no load imbalance */
2476 float dd_pme_f_ratio(gmx_domdec_t *dd)
2478 /* Should only be called on the DD master rank */
2479 assert(DDMASTER(dd));
2481 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2483 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2491 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
2496 flags = dd_load_flags(dd);
2500 "DD load balancing is limited by minimum cell size in dimension");
2501 for (d = 0; d < dd->ndim; d++)
2505 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2508 fprintf(fplog, "\n");
2510 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2511 if (isDlbOn(dd->comm))
2513 fprintf(fplog, " vol min/aver %5.3f%c",
2514 dd_vol_min(dd), flags ? '!' : ' ');
2518 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2520 if (dd->comm->cycl_n[ddCyclPME])
2522 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2524 fprintf(fplog, "\n\n");
2527 static void dd_print_load_verbose(gmx_domdec_t *dd)
2529 if (isDlbOn(dd->comm))
2531 fprintf(stderr, "vol %4.2f%c ",
2532 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2536 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
2538 if (dd->comm->cycl_n[ddCyclPME])
2540 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2545 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2550 domdec_root_t *root;
2551 gmx_bool bPartOfGroup = FALSE;
2553 dim = dd->dim[dim_ind];
2554 copy_ivec(loc, loc_c);
2555 for (i = 0; i < dd->nc[dim]; i++)
2558 rank = dd_index(dd->nc, loc_c);
2559 if (rank == dd->rank)
2561 /* This process is part of the group */
2562 bPartOfGroup = TRUE;
2565 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2569 dd->comm->mpi_comm_load[dim_ind] = c_row;
2570 if (!isDlbDisabled(dd->comm))
2572 if (dd->ci[dim] == dd->master_ci[dim])
2574 /* This is the root process of this row */
2575 snew(dd->comm->root[dim_ind], 1);
2576 root = dd->comm->root[dim_ind];
2577 snew(root->cell_f, ddCellFractionBufferSize(dd, dim_ind));
2578 snew(root->old_cell_f, dd->nc[dim]+1);
2579 snew(root->bCellMin, dd->nc[dim]);
2582 snew(root->cell_f_max0, dd->nc[dim]);
2583 snew(root->cell_f_min1, dd->nc[dim]);
2584 snew(root->bound_min, dd->nc[dim]);
2585 snew(root->bound_max, dd->nc[dim]);
2587 snew(root->buf_ncd, dd->nc[dim]);
2591 /* This is not a root process, we only need to receive cell_f */
2592 snew(dd->comm->cell_f_row, ddCellFractionBufferSize(dd, dim_ind));
2595 if (dd->ci[dim] == dd->master_ci[dim])
2597 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2603 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2607 int physicalnode_id_hash;
2609 MPI_Comm mpi_comm_pp_physicalnode;
2611 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2613 /* Only ranks with short-ranged tasks (currently) use GPUs.
2614 * If we don't have GPUs assigned, there are no resources to share.
2619 physicalnode_id_hash = gmx_physicalnode_id_hash();
2625 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2626 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2627 dd->rank, physicalnode_id_hash, gpu_id);
2629 /* Split the PP communicator over the physical nodes */
2630 /* TODO: See if we should store this (before), as it's also used for
2631 * for the nodecomm summation.
2633 // TODO PhysicalNodeCommunicator could be extended/used to handle
2634 // the need for per-node per-group communicators.
2635 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2636 &mpi_comm_pp_physicalnode);
2637 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2638 &dd->comm->mpi_comm_gpu_shared);
2639 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2640 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2644 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2647 /* Note that some ranks could share a GPU, while others don't */
2649 if (dd->comm->nrank_gpu_shared == 1)
2651 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2654 GMX_UNUSED_VALUE(cr);
2655 GMX_UNUSED_VALUE(gpu_id);
2659 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2662 int dim0, dim1, i, j;
2667 fprintf(debug, "Making load communicators\n");
2670 snew(dd->comm->load, std::max(dd->ndim, 1));
2671 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2679 make_load_communicator(dd, 0, loc);
2683 for (i = 0; i < dd->nc[dim0]; i++)
2686 make_load_communicator(dd, 1, loc);
2692 for (i = 0; i < dd->nc[dim0]; i++)
2696 for (j = 0; j < dd->nc[dim1]; j++)
2699 make_load_communicator(dd, 2, loc);
2706 fprintf(debug, "Finished making load communicators\n");
2711 /*! \brief Sets up the relation between neighboring domains and zones */
2712 static void setup_neighbor_relations(gmx_domdec_t *dd)
2714 int d, dim, i, j, m;
2716 gmx_domdec_zones_t *zones;
2717 gmx_domdec_ns_ranges_t *izone;
2719 for (d = 0; d < dd->ndim; d++)
2722 copy_ivec(dd->ci, tmp);
2723 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2724 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2725 copy_ivec(dd->ci, tmp);
2726 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2727 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2730 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2733 dd->neighbor[d][1]);
2737 int nzone = (1 << dd->ndim);
2738 int nizone = (1 << std::max(dd->ndim - 1, 0));
2739 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2741 zones = &dd->comm->zones;
2743 for (i = 0; i < nzone; i++)
2746 clear_ivec(zones->shift[i]);
2747 for (d = 0; d < dd->ndim; d++)
2749 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2754 for (i = 0; i < nzone; i++)
2756 for (d = 0; d < DIM; d++)
2758 s[d] = dd->ci[d] - zones->shift[i][d];
2763 else if (s[d] >= dd->nc[d])
2769 zones->nizone = nizone;
2770 for (i = 0; i < zones->nizone; i++)
2772 assert(ddNonbondedZonePairRanges[i][0] == i);
2774 izone = &zones->izone[i];
2775 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2776 * j-zones up to nzone.
2778 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2779 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2780 for (dim = 0; dim < DIM; dim++)
2782 if (dd->nc[dim] == 1)
2784 /* All shifts should be allowed */
2785 izone->shift0[dim] = -1;
2786 izone->shift1[dim] = 1;
2790 /* Determine the min/max j-zone shift wrt the i-zone */
2791 izone->shift0[dim] = 1;
2792 izone->shift1[dim] = -1;
2793 for (j = izone->j0; j < izone->j1; j++)
2795 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2796 if (shift_diff < izone->shift0[dim])
2798 izone->shift0[dim] = shift_diff;
2800 if (shift_diff > izone->shift1[dim])
2802 izone->shift1[dim] = shift_diff;
2809 if (!isDlbDisabled(dd->comm))
2811 snew(dd->comm->root, dd->ndim);
2814 if (dd->comm->bRecordLoad)
2816 make_load_communicators(dd);
2820 static void make_pp_communicator(FILE *fplog,
2822 t_commrec gmx_unused *cr,
2823 int gmx_unused reorder)
2826 gmx_domdec_comm_t *comm;
2833 if (comm->bCartesianPP)
2835 /* Set up cartesian communication for the particle-particle part */
2838 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2839 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2842 for (int i = 0; i < DIM; i++)
2846 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
2848 /* We overwrite the old communicator with the new cartesian one */
2849 cr->mpi_comm_mygroup = comm_cart;
2852 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2853 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2855 if (comm->bCartesianPP_PME)
2857 /* Since we want to use the original cartesian setup for sim,
2858 * and not the one after split, we need to make an index.
2860 snew(comm->ddindex2ddnodeid, dd->nnodes);
2861 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2862 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2863 /* Get the rank of the DD master,
2864 * above we made sure that the master node is a PP node.
2874 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2876 else if (comm->bCartesianPP)
2878 if (cr->npmenodes == 0)
2880 /* The PP communicator is also
2881 * the communicator for this simulation
2883 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2885 cr->nodeid = dd->rank;
2887 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2889 /* We need to make an index to go from the coordinates
2890 * to the nodeid of this simulation.
2892 snew(comm->ddindex2simnodeid, dd->nnodes);
2893 snew(buf, dd->nnodes);
2894 if (thisRankHasDuty(cr, DUTY_PP))
2896 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2898 /* Communicate the ddindex to simulation nodeid index */
2899 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2900 cr->mpi_comm_mysim);
2903 /* Determine the master coordinates and rank.
2904 * The DD master should be the same node as the master of this sim.
2906 for (int i = 0; i < dd->nnodes; i++)
2908 if (comm->ddindex2simnodeid[i] == 0)
2910 ddindex2xyz(dd->nc, i, dd->master_ci);
2911 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2916 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2921 /* No Cartesian communicators */
2922 /* We use the rank in dd->comm->all as DD index */
2923 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2924 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2926 clear_ivec(dd->master_ci);
2933 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2934 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2939 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2940 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2944 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2948 gmx_domdec_comm_t *comm = dd->comm;
2950 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2953 snew(comm->ddindex2simnodeid, dd->nnodes);
2954 snew(buf, dd->nnodes);
2955 if (thisRankHasDuty(cr, DUTY_PP))
2957 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2959 /* Communicate the ddindex to simulation nodeid index */
2960 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2961 cr->mpi_comm_mysim);
2965 GMX_UNUSED_VALUE(dd);
2966 GMX_UNUSED_VALUE(cr);
2970 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
2971 DdRankOrder gmx_unused rankOrder,
2972 int gmx_unused reorder)
2974 gmx_domdec_comm_t *comm;
2983 if (comm->bCartesianPP)
2985 for (i = 1; i < DIM; i++)
2987 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2989 if (bDiv[YY] || bDiv[ZZ])
2991 comm->bCartesianPP_PME = TRUE;
2992 /* If we have 2D PME decomposition, which is always in x+y,
2993 * we stack the PME only nodes in z.
2994 * Otherwise we choose the direction that provides the thinnest slab
2995 * of PME only nodes as this will have the least effect
2996 * on the PP communication.
2997 * But for the PME communication the opposite might be better.
2999 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3001 dd->nc[YY] > dd->nc[ZZ]))
3003 comm->cartpmedim = ZZ;
3007 comm->cartpmedim = YY;
3009 comm->ntot[comm->cartpmedim]
3010 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3014 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3016 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3020 if (comm->bCartesianPP_PME)
3028 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3031 for (i = 0; i < DIM; i++)
3035 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
3037 MPI_Comm_rank(comm_cart, &rank);
3038 if (MASTER(cr) && rank != 0)
3040 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3043 /* With this assigment we loose the link to the original communicator
3044 * which will usually be MPI_COMM_WORLD, unless have multisim.
3046 cr->mpi_comm_mysim = comm_cart;
3047 cr->sim_nodeid = rank;
3049 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3053 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3054 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3057 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3061 if (cr->npmenodes == 0 ||
3062 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3064 cr->duty = DUTY_PME;
3067 /* Split the sim communicator into PP and PME only nodes */
3068 MPI_Comm_split(cr->mpi_comm_mysim,
3069 getThisRankDuties(cr),
3070 dd_index(comm->ntot, dd->ci),
3071 &cr->mpi_comm_mygroup);
3078 case DdRankOrder::pp_pme:
3081 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3084 case DdRankOrder::interleave:
3085 /* Interleave the PP-only and PME-only ranks */
3088 fprintf(fplog, "Interleaving PP and PME ranks\n");
3090 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3092 case DdRankOrder::cartesian:
3095 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3098 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3100 cr->duty = DUTY_PME;
3107 /* Split the sim communicator into PP and PME only nodes */
3108 MPI_Comm_split(cr->mpi_comm_mysim,
3109 getThisRankDuties(cr),
3111 &cr->mpi_comm_mygroup);
3112 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3118 fprintf(fplog, "This rank does only %s work.\n\n",
3119 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3123 /*! \brief Generates the MPI communicators for domain decomposition */
3124 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3125 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3127 gmx_domdec_comm_t *comm;
3132 copy_ivec(dd->nc, comm->ntot);
3134 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3135 comm->bCartesianPP_PME = FALSE;
3137 /* Reorder the nodes by default. This might change the MPI ranks.
3138 * Real reordering is only supported on very few architectures,
3139 * Blue Gene is one of them.
3141 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
3143 if (cr->npmenodes > 0)
3145 /* Split the communicator into a PP and PME part */
3146 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3147 if (comm->bCartesianPP_PME)
3149 /* We (possibly) reordered the nodes in split_communicator,
3150 * so it is no longer required in make_pp_communicator.
3152 CartReorder = FALSE;
3157 /* All nodes do PP and PME */
3159 /* We do not require separate communicators */
3160 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3164 if (thisRankHasDuty(cr, DUTY_PP))
3166 /* Copy or make a new PP communicator */
3167 make_pp_communicator(fplog, dd, cr, CartReorder);
3171 receive_ddindex2simnodeid(dd, cr);
3174 if (!thisRankHasDuty(cr, DUTY_PME))
3176 /* Set up the commnuication to our PME node */
3177 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3178 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3181 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
3182 dd->pme_nodeid, dd->pme_receive_vir_ener);
3187 dd->pme_nodeid = -1;
3192 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3194 comm->cgs_gl.index[comm->cgs_gl.nr]);
3198 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3200 real *slb_frac, tot;
3205 if (nc > 1 && size_string != nullptr)
3209 fprintf(fplog, "Using static load balancing for the %s direction\n",
3214 for (i = 0; i < nc; i++)
3217 sscanf(size_string, "%20lf%n", &dbl, &n);
3220 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3229 fprintf(fplog, "Relative cell sizes:");
3231 for (i = 0; i < nc; i++)
3236 fprintf(fplog, " %5.3f", slb_frac[i]);
3241 fprintf(fplog, "\n");
3248 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3251 gmx_mtop_ilistloop_t iloop;
3255 iloop = gmx_mtop_ilistloop_init(mtop);
3256 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3258 for (ftype = 0; ftype < F_NRE; ftype++)
3260 if ((interaction_function[ftype].flags & IF_BOND) &&
3263 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3271 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3277 val = getenv(env_var);
3280 if (sscanf(val, "%20d", &nst) <= 0)
3286 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3294 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3298 fprintf(stderr, "\n%s\n", warn_string);
3302 fprintf(fplog, "\n%s\n", warn_string);
3306 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3307 const t_inputrec *ir, FILE *fplog)
3309 if (ir->ePBC == epbcSCREW &&
3310 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3312 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3315 if (ir->ns_type == ensSIMPLE)
3317 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3320 if (ir->nstlist == 0)
3322 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3325 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3327 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3331 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3336 r = ddbox->box_size[XX];
3337 for (di = 0; di < dd->ndim; di++)
3340 /* Check using the initial average cell size */
3341 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3347 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3349 static int forceDlbOffOrBail(int cmdlineDlbState,
3350 const std::string &reasonStr,
3354 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3355 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3357 if (cmdlineDlbState == edlbsOnUser)
3359 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
3361 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3363 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3365 return edlbsOffForever;
3368 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3370 * This function parses the parameters of "-dlb" command line option setting
3371 * corresponding state values. Then it checks the consistency of the determined
3372 * state with other run parameters and settings. As a result, the initial state
3373 * may be altered or an error may be thrown if incompatibility of options is detected.
3375 * \param [in] fplog Pointer to mdrun log file.
3376 * \param [in] cr Pointer to MPI communication object.
3377 * \param [in] dlbOption Enum value for the DLB option.
3378 * \param [in] bRecordLoad True if the load balancer is recording load information.
3379 * \param [in] mdrunOptions Options for mdrun.
3380 * \param [in] ir Pointer mdrun to input parameters.
3381 * \returns DLB initial/startup state.
3383 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3384 DlbOption dlbOption, gmx_bool bRecordLoad,
3385 const MdrunOptions &mdrunOptions,
3386 const t_inputrec *ir)
3388 int dlbState = edlbsOffCanTurnOn;
3392 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3393 case DlbOption::no: dlbState = edlbsOffUser; break;
3394 case DlbOption::yes: dlbState = edlbsOnUser; break;
3395 default: gmx_incons("Invalid dlbOption enum value");
3398 /* Reruns don't support DLB: bail or override auto mode */
3399 if (mdrunOptions.rerun)
3401 std::string reasonStr = "it is not supported in reruns.";
3402 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3405 /* Unsupported integrators */
3406 if (!EI_DYNAMICS(ir->eI))
3408 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3409 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3412 /* Without cycle counters we can't time work to balance on */
3415 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3416 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3419 if (mdrunOptions.reproducible)
3421 std::string reasonStr = "you started a reproducible run.";
3426 case edlbsOffForever:
3427 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3429 case edlbsOffCanTurnOn:
3430 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3432 case edlbsOnCanTurnOff:
3433 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3436 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3439 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3447 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3450 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3452 /* Decomposition order z,y,x */
3455 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3457 for (int dim = DIM-1; dim >= 0; dim--)
3459 if (dd->nc[dim] > 1)
3461 dd->dim[dd->ndim++] = dim;
3467 /* Decomposition order x,y,z */
3468 for (int dim = 0; dim < DIM; dim++)
3470 if (dd->nc[dim] > 1)
3472 dd->dim[dd->ndim++] = dim;
3479 /* Set dim[0] to avoid extra checks on ndim in several places */
3484 static gmx_domdec_comm_t *init_dd_comm()
3486 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3488 comm->n_load_have = 0;
3489 comm->n_load_collect = 0;
3491 comm->haveTurnedOffDlb = false;
3493 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3495 comm->sum_nat[i] = 0;
3499 comm->load_step = 0;
3502 clear_ivec(comm->load_lim);
3506 /* This should be replaced by a unique pointer */
3507 comm->balanceRegion = ddBalanceRegionAllocate();
3512 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3513 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3514 const DomdecOptions &options,
3515 const MdrunOptions &mdrunOptions,
3516 const gmx_mtop_t *mtop,
3517 const t_inputrec *ir,
3519 gmx::ArrayRef<const gmx::RVec> xGlobal,
3523 real r_bonded_limit = -1;
3524 const real tenPercentMargin = 1.1;
3525 gmx_domdec_comm_t *comm = dd->comm;
3527 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3528 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3530 dd->pme_recv_f_alloc = 0;
3531 dd->pme_recv_f_buf = nullptr;
3533 /* Initialize to GPU share count to 0, might change later */
3534 comm->nrank_gpu_shared = 0;
3536 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3537 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3538 /* To consider turning DLB on after 2*nstlist steps we need to check
3539 * at partitioning count 3. Thus we need to increase the first count by 2.
3541 comm->ddPartioningCountFirstDlbOff += 2;
3545 fprintf(fplog, "Dynamic load balancing: %s\n",
3546 edlbs_names[comm->dlbState]);
3548 comm->bPMELoadBalDLBLimits = FALSE;
3550 /* Allocate the charge group/atom sorting struct */
3551 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3553 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3555 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3556 mtop->bIntermolecularInteractions);
3557 if (comm->bInterCGBondeds)
3559 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3563 comm->bInterCGMultiBody = FALSE;
3566 dd->bInterCGcons = inter_charge_group_constraints(*mtop);
3567 dd->bInterCGsettles = inter_charge_group_settles(*mtop);
3571 /* Set the cut-off to some very large value,
3572 * so we don't need if statements everywhere in the code.
3573 * We use sqrt, since the cut-off is squared in some places.
3575 comm->cutoff = GMX_CUTOFF_INF;
3579 comm->cutoff = ir->rlist;
3581 comm->cutoff_mbody = 0;
3583 comm->cellsize_limit = 0;
3584 comm->bBondComm = FALSE;
3586 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3587 * within nstlist steps. Since boundaries are allowed to displace by half
3588 * a cell size, DD cells should be at least the size of the list buffer.
3590 comm->cellsize_limit = std::max(comm->cellsize_limit,
3591 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3593 if (comm->bInterCGBondeds)
3595 if (options.minimumCommunicationRange > 0)
3597 comm->cutoff_mbody = options.minimumCommunicationRange;
3598 if (options.useBondedCommunication)
3600 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3604 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3606 r_bonded_limit = comm->cutoff_mbody;
3608 else if (ir->bPeriodicMols)
3610 /* Can not easily determine the required cut-off */
3611 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3612 comm->cutoff_mbody = comm->cutoff/2;
3613 r_bonded_limit = comm->cutoff_mbody;
3621 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3622 options.checkBondedInteractions,
3625 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3626 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3628 /* We use an initial margin of 10% for the minimum cell size,
3629 * except when we are just below the non-bonded cut-off.
3631 if (options.useBondedCommunication)
3633 if (std::max(r_2b, r_mb) > comm->cutoff)
3635 r_bonded = std::max(r_2b, r_mb);
3636 r_bonded_limit = tenPercentMargin*r_bonded;
3637 comm->bBondComm = TRUE;
3642 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3644 /* We determine cutoff_mbody later */
3648 /* No special bonded communication,
3649 * simply increase the DD cut-off.
3651 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3652 comm->cutoff_mbody = r_bonded_limit;
3653 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3659 "Minimum cell size due to bonded interactions: %.3f nm\n",
3662 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3666 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3668 /* There is a cell size limit due to the constraints (P-LINCS) */
3669 rconstr = constr_r_max(fplog, mtop, ir);
3673 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3675 if (rconstr > comm->cellsize_limit)
3677 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3681 else if (options.constraintCommunicationRange > 0 && fplog)
3683 /* Here we do not check for dd->bInterCGcons,
3684 * because one can also set a cell size limit for virtual sites only
3685 * and at this point we don't know yet if there are intercg v-sites.
3688 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3689 options.constraintCommunicationRange);
3690 rconstr = options.constraintCommunicationRange;
3692 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3694 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3696 if (options.numCells[XX] > 0)
3698 copy_ivec(options.numCells, dd->nc);
3699 set_dd_dim(fplog, dd);
3700 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3702 if (options.numPmeRanks >= 0)
3704 cr->npmenodes = options.numPmeRanks;
3708 /* When the DD grid is set explicitly and -npme is set to auto,
3709 * don't use PME ranks. We check later if the DD grid is
3710 * compatible with the total number of ranks.
3715 real acs = average_cellsize_min(dd, ddbox);
3716 if (acs < comm->cellsize_limit)
3720 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3722 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3723 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3724 acs, comm->cellsize_limit);
3729 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3731 /* We need to choose the optimal DD grid and possibly PME nodes */
3733 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3734 options.numPmeRanks,
3735 !isDlbDisabled(comm),
3737 comm->cellsize_limit, comm->cutoff,
3738 comm->bInterCGBondeds);
3740 if (dd->nc[XX] == 0)
3743 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3744 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3745 !bC ? "-rdd" : "-rcon",
3746 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3747 bC ? " or your LINCS settings" : "");
3749 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3750 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3752 "Look in the log file for details on the domain decomposition",
3753 cr->nnodes-cr->npmenodes, limit, buf);
3755 set_dd_dim(fplog, dd);
3761 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3762 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3765 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3766 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3768 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3769 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3770 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3772 if (cr->npmenodes > dd->nnodes)
3774 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3775 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3777 if (cr->npmenodes > 0)
3779 comm->npmenodes = cr->npmenodes;
3783 comm->npmenodes = dd->nnodes;
3786 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3788 /* The following choices should match those
3789 * in comm_cost_est in domdec_setup.c.
3790 * Note that here the checks have to take into account
3791 * that the decomposition might occur in a different order than xyz
3792 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3793 * in which case they will not match those in comm_cost_est,
3794 * but since that is mainly for testing purposes that's fine.
3796 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3797 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3798 getenv("GMX_PMEONEDD") == nullptr)
3800 comm->npmedecompdim = 2;
3801 comm->npmenodes_x = dd->nc[XX];
3802 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3806 /* In case nc is 1 in both x and y we could still choose to
3807 * decompose pme in y instead of x, but we use x for simplicity.
3809 comm->npmedecompdim = 1;
3810 if (dd->dim[0] == YY)
3812 comm->npmenodes_x = 1;
3813 comm->npmenodes_y = comm->npmenodes;
3817 comm->npmenodes_x = comm->npmenodes;
3818 comm->npmenodes_y = 1;
3823 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3824 comm->npmenodes_x, comm->npmenodes_y, 1);
3829 comm->npmedecompdim = 0;
3830 comm->npmenodes_x = 0;
3831 comm->npmenodes_y = 0;
3834 snew(comm->slb_frac, DIM);
3835 if (isDlbDisabled(comm))
3837 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3838 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3839 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3842 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3844 if (comm->bBondComm || !isDlbDisabled(comm))
3846 /* Set the bonded communication distance to halfway
3847 * the minimum and the maximum,
3848 * since the extra communication cost is nearly zero.
3850 real acs = average_cellsize_min(dd, ddbox);
3851 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3852 if (!isDlbDisabled(comm))
3854 /* Check if this does not limit the scaling */
3855 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3856 options.dlbScaling*acs);
3858 if (!comm->bBondComm)
3860 /* Without bBondComm do not go beyond the n.b. cut-off */
3861 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3862 if (comm->cellsize_limit >= comm->cutoff)
3864 /* We don't loose a lot of efficieny
3865 * when increasing it to the n.b. cut-off.
3866 * It can even be slightly faster, because we need
3867 * less checks for the communication setup.
3869 comm->cutoff_mbody = comm->cutoff;
3872 /* Check if we did not end up below our original limit */
3873 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3875 if (comm->cutoff_mbody > comm->cellsize_limit)
3877 comm->cellsize_limit = comm->cutoff_mbody;
3880 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3885 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
3886 "cellsize limit %f\n",
3887 comm->bBondComm, comm->cellsize_limit);
3892 check_dd_restrictions(cr, dd, ir, fplog);
3896 static void set_dlb_limits(gmx_domdec_t *dd)
3899 for (int d = 0; d < dd->ndim; d++)
3901 /* Set the number of pulses to the value for DLB */
3902 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3904 dd->comm->cellsize_min[dd->dim[d]] =
3905 dd->comm->cellsize_min_dlb[dd->dim[d]];
3910 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3913 gmx_domdec_comm_t *comm;
3920 cellsize_min = comm->cellsize_min[dd->dim[0]];
3921 for (d = 1; d < dd->ndim; d++)
3923 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3926 /* Turn off DLB if we're too close to the cell size limit. */
3927 if (cellsize_min < comm->cellsize_limit*1.05)
3929 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3930 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3931 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3932 dd_warning(cr, fplog, str.c_str());
3934 comm->dlbState = edlbsOffForever;
3939 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3940 dd_warning(cr, fplog, buf);
3941 comm->dlbState = edlbsOnCanTurnOff;
3943 /* Store the non-DLB performance, so we can check if DLB actually
3944 * improves performance.
3946 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3947 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3951 /* We can set the required cell size info here,
3952 * so we do not need to communicate this.
3953 * The grid is completely uniform.
3955 for (d = 0; d < dd->ndim; d++)
3959 comm->load[d].sum_m = comm->load[d].sum;
3961 nc = dd->nc[dd->dim[d]];
3962 for (i = 0; i < nc; i++)
3964 comm->root[d]->cell_f[i] = i/(real)nc;
3967 comm->root[d]->cell_f_max0[i] = i /(real)nc;
3968 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
3971 comm->root[d]->cell_f[nc] = 1.0;
3976 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3978 gmx_domdec_t *dd = cr->dd;
3981 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
3982 dd_warning(cr, fplog, buf);
3983 dd->comm->dlbState = edlbsOffCanTurnOn;
3984 dd->comm->haveTurnedOffDlb = true;
3985 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3988 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3990 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3992 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
3993 dd_warning(cr, fplog, buf);
3994 cr->dd->comm->dlbState = edlbsOffForever;
3997 static char *init_bLocalCG(const gmx_mtop_t *mtop)
4002 ncg = ncg_mtop(mtop);
4003 snew(bLocalCG, ncg);
4004 for (cg = 0; cg < ncg; cg++)
4006 bLocalCG[cg] = FALSE;
4012 void dd_init_bondeds(FILE *fplog,
4014 const gmx_mtop_t *mtop,
4015 const gmx_vsite_t *vsite,
4016 const t_inputrec *ir,
4017 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4019 gmx_domdec_comm_t *comm;
4021 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4025 if (comm->bBondComm)
4027 /* Communicate atoms beyond the cut-off for bonded interactions */
4030 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4032 comm->bLocalCG = init_bLocalCG(mtop);
4036 /* Only communicate atoms based on cut-off */
4037 comm->cglink = nullptr;
4038 comm->bLocalCG = nullptr;
4042 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4043 const gmx_mtop_t *mtop, const t_inputrec *ir,
4044 gmx_bool bDynLoadBal, real dlb_scale,
4045 const gmx_ddbox_t *ddbox)
4047 gmx_domdec_comm_t *comm;
4053 if (fplog == nullptr)
4062 fprintf(fplog, "The maximum number of communication pulses is:");
4063 for (d = 0; d < dd->ndim; d++)
4065 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4067 fprintf(fplog, "\n");
4068 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4069 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4070 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4071 for (d = 0; d < DIM; d++)
4075 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4082 comm->cellsize_min_dlb[d]/
4083 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4085 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4088 fprintf(fplog, "\n");
4092 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4093 fprintf(fplog, "The initial number of communication pulses is:");
4094 for (d = 0; d < dd->ndim; d++)
4096 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4098 fprintf(fplog, "\n");
4099 fprintf(fplog, "The initial domain decomposition cell size is:");
4100 for (d = 0; d < DIM; d++)
4104 fprintf(fplog, " %c %.2f nm",
4105 dim2char(d), dd->comm->cellsize_min[d]);
4108 fprintf(fplog, "\n\n");
4111 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
4113 if (comm->bInterCGBondeds ||
4115 dd->bInterCGcons || dd->bInterCGsettles)
4117 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4118 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4119 "non-bonded interactions", "", comm->cutoff);
4123 limit = dd->comm->cellsize_limit;
4127 if (dynamic_dd_box(ddbox, ir))
4129 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4131 limit = dd->comm->cellsize_min[XX];
4132 for (d = 1; d < DIM; d++)
4134 limit = std::min(limit, dd->comm->cellsize_min[d]);
4138 if (comm->bInterCGBondeds)
4140 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4141 "two-body bonded interactions", "(-rdd)",
4142 std::max(comm->cutoff, comm->cutoff_mbody));
4143 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4144 "multi-body bonded interactions", "(-rdd)",
4145 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4149 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4150 "virtual site constructions", "(-rcon)", limit);
4152 if (dd->bInterCGcons || dd->bInterCGsettles)
4154 sprintf(buf, "atoms separated by up to %d constraints",
4156 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4157 buf, "(-rcon)", limit);
4159 fprintf(fplog, "\n");
4165 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4167 const t_inputrec *ir,
4168 const gmx_ddbox_t *ddbox)
4170 gmx_domdec_comm_t *comm;
4171 int d, dim, npulse, npulse_d_max, npulse_d;
4176 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4178 /* Determine the maximum number of comm. pulses in one dimension */
4180 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4182 /* Determine the maximum required number of grid pulses */
4183 if (comm->cellsize_limit >= comm->cutoff)
4185 /* Only a single pulse is required */
4188 else if (!bNoCutOff && comm->cellsize_limit > 0)
4190 /* We round down slightly here to avoid overhead due to the latency
4191 * of extra communication calls when the cut-off
4192 * would be only slightly longer than the cell size.
4193 * Later cellsize_limit is redetermined,
4194 * so we can not miss interactions due to this rounding.
4196 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
4200 /* There is no cell size limit */
4201 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4204 if (!bNoCutOff && npulse > 1)
4206 /* See if we can do with less pulses, based on dlb_scale */
4208 for (d = 0; d < dd->ndim; d++)
4211 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
4212 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4213 npulse_d_max = std::max(npulse_d_max, npulse_d);
4215 npulse = std::min(npulse, npulse_d_max);
4218 /* This env var can override npulse */
4219 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4226 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4227 for (d = 0; d < dd->ndim; d++)
4229 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4230 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4231 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4233 comm->bVacDLBNoLimit = FALSE;
4237 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4238 if (!comm->bVacDLBNoLimit)
4240 comm->cellsize_limit = std::max(comm->cellsize_limit,
4241 comm->cutoff/comm->maxpulse);
4243 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4244 /* Set the minimum cell size for each DD dimension */
4245 for (d = 0; d < dd->ndim; d++)
4247 if (comm->bVacDLBNoLimit ||
4248 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4250 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4254 comm->cellsize_min_dlb[dd->dim[d]] =
4255 comm->cutoff/comm->cd[d].np_dlb;
4258 if (comm->cutoff_mbody <= 0)
4260 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4268 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4270 /* If each molecule is a single charge group
4271 * or we use domain decomposition for each periodic dimension,
4272 * we do not need to take pbc into account for the bonded interactions.
4274 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4277 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4280 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4281 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4282 const gmx_mtop_t *mtop, const t_inputrec *ir,
4283 const gmx_ddbox_t *ddbox)
4285 gmx_domdec_comm_t *comm;
4291 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4293 init_ddpme(dd, &comm->ddpme[0], 0);
4294 if (comm->npmedecompdim >= 2)
4296 init_ddpme(dd, &comm->ddpme[1], 1);
4301 comm->npmenodes = 0;
4302 if (dd->pme_nodeid >= 0)
4304 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4305 "Can not have separate PME ranks without PME electrostatics");
4311 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4313 if (!isDlbDisabled(comm))
4315 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4318 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4319 if (comm->dlbState == edlbsOffCanTurnOn)
4323 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4325 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4328 if (ir->ePBC == epbcNONE)
4330 vol_frac = 1 - 1/(double)dd->nnodes;
4335 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
4339 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4341 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4343 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
4346 /*! \brief Set some important DD parameters that can be modified by env.vars */
4347 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4349 gmx_domdec_comm_t *comm = dd->comm;
4351 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
4352 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4353 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4354 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4355 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4356 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4357 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4359 if (dd->bSendRecv2 && fplog)
4361 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4368 fprintf(fplog, "Will load balance based on FLOP count\n");
4370 if (comm->eFlop > 1)
4372 srand(1 + rank_mysim);
4374 comm->bRecordLoad = TRUE;
4378 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4382 DomdecOptions::DomdecOptions() :
4383 checkBondedInteractions(TRUE),
4384 useBondedCommunication(TRUE),
4386 rankOrder(DdRankOrder::pp_pme),
4387 minimumCommunicationRange(0),
4388 constraintCommunicationRange(0),
4389 dlbOption(DlbOption::turnOnWhenUseful),
4395 clear_ivec(numCells);
4398 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4399 const DomdecOptions &options,
4400 const MdrunOptions &mdrunOptions,
4401 const gmx_mtop_t *mtop,
4402 const t_inputrec *ir,
4404 gmx::ArrayRef<const gmx::RVec> xGlobal)
4411 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4414 dd = new gmx_domdec_t;
4416 dd->comm = init_dd_comm();
4418 /* Initialize DD paritioning counters */
4419 dd->comm->partition_step = INT_MIN;
4422 set_dd_envvar_options(fplog, dd, cr->nodeid);
4424 gmx_ddbox_t ddbox = {0};
4425 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4430 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4432 if (thisRankHasDuty(cr, DUTY_PP))
4434 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4436 setup_neighbor_relations(dd);
4439 /* Set overallocation to avoid frequent reallocation of arrays */
4440 set_over_alloc_dd(TRUE);
4442 clear_dd_cycle_counts(dd);
4447 static gmx_bool test_dd_cutoff(t_commrec *cr,
4448 t_state *state, const t_inputrec *ir,
4459 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4463 for (d = 0; d < dd->ndim; d++)
4467 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4468 if (dynamic_dd_box(&ddbox, ir))
4470 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4473 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4475 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4477 if (np > dd->comm->cd[d].np_dlb)
4482 /* If a current local cell size is smaller than the requested
4483 * cut-off, we could still fix it, but this gets very complicated.
4484 * Without fixing here, we might actually need more checks.
4486 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4493 if (!isDlbDisabled(dd->comm))
4495 /* If DLB is not active yet, we don't need to check the grid jumps.
4496 * Actually we shouldn't, because then the grid jump data is not set.
4498 if (isDlbOn(dd->comm) &&
4499 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4504 gmx_sumi(1, &LocallyLimited, cr);
4506 if (LocallyLimited > 0)
4515 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4518 gmx_bool bCutoffAllowed;
4520 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4524 cr->dd->comm->cutoff = cutoff_req;
4527 return bCutoffAllowed;
4530 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4532 gmx_domdec_comm_t *comm;
4534 comm = cr->dd->comm;
4536 /* Turn on the DLB limiting (might have been on already) */
4537 comm->bPMELoadBalDLBLimits = TRUE;
4539 /* Change the cut-off limit */
4540 comm->PMELoadBal_max_cutoff = cutoff;
4544 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4545 comm->PMELoadBal_max_cutoff);
4549 /* Sets whether we should later check the load imbalance data, so that
4550 * we can trigger dynamic load balancing if enough imbalance has
4553 * Used after PME load balancing unlocks DLB, so that the check
4554 * whether DLB will be useful can happen immediately.
4556 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4558 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4560 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4564 /* Store the DD partitioning count, so we can ignore cycle counts
4565 * over the next nstlist steps, which are often slower.
4567 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4572 /* Returns if we should check whether there has been enough load
4573 * imbalance to trigger dynamic load balancing.
4575 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4577 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4582 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4584 /* We ignore the first nstlist steps at the start of the run
4585 * or after PME load balancing or after turning DLB off, since
4586 * these often have extra allocation or cache miss overhead.
4591 if (dd->comm->cycl_n[ddCyclStep] == 0)
4593 /* We can have zero timed steps when dd_partition_system is called
4594 * more than once at the same step, e.g. with replica exchange.
4595 * Turning on DLB would trigger an assertion failure later, but is
4596 * also useless right after exchanging replicas.
4601 /* We should check whether we should use DLB directly after
4603 if (dd->comm->bCheckWhetherToTurnDlbOn)
4605 /* This flag was set when the PME load-balancing routines
4606 unlocked DLB, and should now be cleared. */
4607 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4610 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4611 * partitionings (we do not do this every partioning, so that we
4612 * avoid excessive communication). */
4613 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
4621 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4623 return isDlbOn(dd->comm);
4626 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4628 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4631 void dd_dlb_lock(gmx_domdec_t *dd)
4633 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4634 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4636 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4640 void dd_dlb_unlock(gmx_domdec_t *dd)
4642 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4643 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4645 dd->comm->dlbState = edlbsOffCanTurnOn;
4646 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4650 static void merge_cg_buffers(int ncell,
4651 gmx_domdec_comm_dim_t *cd, int pulse,
4653 gmx::ArrayRef<int> index_gl,
4655 rvec *cg_cm, rvec *recv_vr,
4656 gmx::ArrayRef<int> cgindex,
4657 cginfo_mb_t *cginfo_mb, int *cginfo)
4659 gmx_domdec_ind_t *ind, *ind_p;
4660 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4661 int shift, shift_at;
4663 ind = &cd->ind[pulse];
4665 /* First correct the already stored data */
4666 shift = ind->nrecv[ncell];
4667 for (cell = ncell-1; cell >= 0; cell--)
4669 shift -= ind->nrecv[cell];
4672 /* Move the cg's present from previous grid pulses */
4673 cg0 = ncg_cell[ncell+cell];
4674 cg1 = ncg_cell[ncell+cell+1];
4675 cgindex[cg1+shift] = cgindex[cg1];
4676 for (cg = cg1-1; cg >= cg0; cg--)
4678 index_gl[cg+shift] = index_gl[cg];
4679 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4680 cgindex[cg+shift] = cgindex[cg];
4681 cginfo[cg+shift] = cginfo[cg];
4683 /* Correct the already stored send indices for the shift */
4684 for (p = 1; p <= pulse; p++)
4686 ind_p = &cd->ind[p];
4688 for (c = 0; c < cell; c++)
4690 cg0 += ind_p->nsend[c];
4692 cg1 = cg0 + ind_p->nsend[cell];
4693 for (cg = cg0; cg < cg1; cg++)
4695 ind_p->index[cg] += shift;
4701 /* Merge in the communicated buffers */
4705 for (cell = 0; cell < ncell; cell++)
4707 cg1 = ncg_cell[ncell+cell+1] + shift;
4710 /* Correct the old cg indices */
4711 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4713 cgindex[cg+1] += shift_at;
4716 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4718 /* Copy this charge group from the buffer */
4719 index_gl[cg1] = recv_i[cg0];
4720 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4721 /* Add it to the cgindex */
4722 cg_gl = index_gl[cg1];
4723 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4724 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4725 cgindex[cg1+1] = cgindex[cg1] + nat;
4730 shift += ind->nrecv[cell];
4731 ncg_cell[ncell+cell+1] = cg1;
4735 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4738 const RangePartitioning &atomGroups)
4740 /* Store the atom block boundaries for easy copying of communication buffers
4742 int g = atomGroupStart;
4743 for (int zone = 0; zone < nzone; zone++)
4745 for (gmx_domdec_ind_t &ind : cd->ind)
4747 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4748 ind.cell2at0[zone] = range.begin();
4749 ind.cell2at1[zone] = range.end();
4750 g += ind.nrecv[zone];
4755 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
4761 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4763 if (!bLocalCG[link->a[i]])
4772 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4774 real c[DIM][4]; /* the corners for the non-bonded communication */
4775 real cr0; /* corner for rounding */
4776 real cr1[4]; /* corners for rounding */
4777 real bc[DIM]; /* corners for bounded communication */
4778 real bcr1; /* corner for rounding for bonded communication */
4781 /* Determine the corners of the domain(s) we are communicating with */
4783 set_dd_corners(const gmx_domdec_t *dd,
4784 int dim0, int dim1, int dim2,
4788 const gmx_domdec_comm_t *comm;
4789 const gmx_domdec_zones_t *zones;
4794 zones = &comm->zones;
4796 /* Keep the compiler happy */
4800 /* The first dimension is equal for all cells */
4801 c->c[0][0] = comm->cell_x0[dim0];
4804 c->bc[0] = c->c[0][0];
4809 /* This cell row is only seen from the first row */
4810 c->c[1][0] = comm->cell_x0[dim1];
4811 /* All rows can see this row */
4812 c->c[1][1] = comm->cell_x0[dim1];
4813 if (isDlbOn(dd->comm))
4815 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4818 /* For the multi-body distance we need the maximum */
4819 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4822 /* Set the upper-right corner for rounding */
4823 c->cr0 = comm->cell_x1[dim0];
4828 for (j = 0; j < 4; j++)
4830 c->c[2][j] = comm->cell_x0[dim2];
4832 if (isDlbOn(dd->comm))
4834 /* Use the maximum of the i-cells that see a j-cell */
4835 for (i = 0; i < zones->nizone; i++)
4837 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4842 std::max(c->c[2][j-4],
4843 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4849 /* For the multi-body distance we need the maximum */
4850 c->bc[2] = comm->cell_x0[dim2];
4851 for (i = 0; i < 2; i++)
4853 for (j = 0; j < 2; j++)
4855 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4861 /* Set the upper-right corner for rounding */
4862 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4863 * Only cell (0,0,0) can see cell 7 (1,1,1)
4865 c->cr1[0] = comm->cell_x1[dim1];
4866 c->cr1[3] = comm->cell_x1[dim1];
4867 if (isDlbOn(dd->comm))
4869 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4872 /* For the multi-body distance we need the maximum */
4873 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4880 /* Add the atom groups we need to send in this pulse from this zone to
4881 * \p localAtomGroups and \p work
4884 get_zone_pulse_cgs(gmx_domdec_t *dd,
4885 int zonei, int zone,
4887 gmx::ArrayRef<const int> globalAtomGroupIndices,
4888 const gmx::RangePartitioning &atomGroups,
4889 int dim, int dim_ind,
4890 int dim0, int dim1, int dim2,
4891 real r_comm2, real r_bcomm2,
4893 bool distanceIsTriclinic,
4895 real skew_fac2_d, real skew_fac_01,
4896 rvec *v_d, rvec *v_0, rvec *v_1,
4897 const dd_corners_t *c,
4899 gmx_bool bDistBonded,
4905 std::vector<int> *localAtomGroups,
4906 dd_comm_setup_work_t *work)
4908 gmx_domdec_comm_t *comm;
4910 gmx_bool bDistMB_pulse;
4912 real r2, rb2, r, tric_sh;
4919 bScrew = (dd->bScrewPBC && dim == XX);
4921 bDistMB_pulse = (bDistMB && bDistBonded);
4923 /* Unpack the work data */
4924 std::vector<int> &ibuf = work->atomGroupBuffer;
4925 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4929 for (cg = cg0; cg < cg1; cg++)
4933 if (!distanceIsTriclinic)
4935 /* Rectangular direction, easy */
4936 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4943 r = cg_cm[cg][dim] - c->bc[dim_ind];
4949 /* Rounding gives at most a 16% reduction
4950 * in communicated atoms
4952 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4954 r = cg_cm[cg][dim0] - c->cr0;
4955 /* This is the first dimension, so always r >= 0 */
4962 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4964 r = cg_cm[cg][dim1] - c->cr1[zone];
4971 r = cg_cm[cg][dim1] - c->bcr1;
4981 /* Triclinic direction, more complicated */
4984 /* Rounding, conservative as the skew_fac multiplication
4985 * will slightly underestimate the distance.
4987 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4989 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4990 for (i = dim0+1; i < DIM; i++)
4992 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4994 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4997 rb[dim0] = rn[dim0];
5000 /* Take care that the cell planes along dim0 might not
5001 * be orthogonal to those along dim1 and dim2.
5003 for (i = 1; i <= dim_ind; i++)
5006 if (normal[dim0][dimd] > 0)
5008 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5011 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5016 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5018 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5020 for (i = dim1+1; i < DIM; i++)
5022 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5024 rn[dim1] += tric_sh;
5027 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5028 /* Take care of coupling of the distances
5029 * to the planes along dim0 and dim1 through dim2.
5031 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5032 /* Take care that the cell planes along dim1
5033 * might not be orthogonal to that along dim2.
5035 if (normal[dim1][dim2] > 0)
5037 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5043 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5046 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5047 /* Take care of coupling of the distances
5048 * to the planes along dim0 and dim1 through dim2.
5050 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5051 /* Take care that the cell planes along dim1
5052 * might not be orthogonal to that along dim2.
5054 if (normal[dim1][dim2] > 0)
5056 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5061 /* The distance along the communication direction */
5062 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5064 for (i = dim+1; i < DIM; i++)
5066 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5071 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5072 /* Take care of coupling of the distances
5073 * to the planes along dim0 and dim1 through dim2.
5075 if (dim_ind == 1 && zonei == 1)
5077 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5083 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5086 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5087 /* Take care of coupling of the distances
5088 * to the planes along dim0 and dim1 through dim2.
5090 if (dim_ind == 1 && zonei == 1)
5092 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5100 ((bDistMB && rb2 < r_bcomm2) ||
5101 (bDist2B && r2 < r_bcomm2)) &&
5103 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5104 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5107 /* Store the local and global atom group indices and position */
5108 localAtomGroups->push_back(cg);
5109 ibuf.push_back(globalAtomGroupIndices[cg]);
5113 if (dd->ci[dim] == 0)
5115 /* Correct cg_cm for pbc */
5116 rvec_add(cg_cm[cg], box[dim], posPbc);
5119 posPbc[YY] = box[YY][YY] - posPbc[YY];
5120 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5125 copy_rvec(cg_cm[cg], posPbc);
5127 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5129 nat += atomGroups.block(cg).size();
5134 work->nsend_zone = nsend_z;
5137 static void clearCommSetupData(dd_comm_setup_work_t *work)
5139 work->localAtomGroupBuffer.clear();
5140 work->atomGroupBuffer.clear();
5141 work->positionBuffer.clear();
5143 work->nsend_zone = 0;
5146 static void setup_dd_communication(gmx_domdec_t *dd,
5147 matrix box, gmx_ddbox_t *ddbox,
5149 t_state *state, PaddedRVecVector *f)
5151 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5152 int nzone, nzone_send, zone, zonei, cg0, cg1;
5153 int c, i, cg, cg_gl, nrcg;
5154 int *zone_cg_range, pos_cg;
5155 gmx_domdec_comm_t *comm;
5156 gmx_domdec_zones_t *zones;
5157 gmx_domdec_comm_dim_t *cd;
5158 cginfo_mb_t *cginfo_mb;
5159 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5160 real r_comm2, r_bcomm2;
5161 dd_corners_t corners;
5162 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5163 real skew_fac2_d, skew_fac_01;
5168 fprintf(debug, "Setting up DD communication\n");
5173 if (comm->dth.empty())
5175 /* Initialize the thread data.
5176 * This can not be done in init_domain_decomposition,
5177 * as the numbers of threads is determined later.
5179 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5180 comm->dth.resize(numThreads);
5183 switch (fr->cutoff_scheme)
5189 cg_cm = as_rvec_array(state->x.data());
5192 gmx_incons("unimplemented");
5196 bBondComm = comm->bBondComm;
5198 /* Do we need to determine extra distances for multi-body bondeds? */
5199 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5201 /* Do we need to determine extra distances for only two-body bondeds? */
5202 bDist2B = (bBondComm && !bDistMB);
5204 r_comm2 = gmx::square(comm->cutoff);
5205 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5209 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
5212 zones = &comm->zones;
5215 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5216 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5218 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5220 /* Triclinic stuff */
5221 normal = ddbox->normal;
5225 v_0 = ddbox->v[dim0];
5226 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5228 /* Determine the coupling coefficient for the distances
5229 * to the cell planes along dim0 and dim1 through dim2.
5230 * This is required for correct rounding.
5233 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5236 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5242 v_1 = ddbox->v[dim1];
5245 zone_cg_range = zones->cg_range;
5246 cginfo_mb = fr->cginfo_mb;
5248 zone_cg_range[0] = 0;
5249 zone_cg_range[1] = dd->ncg_home;
5250 comm->zone_ncg1[0] = dd->ncg_home;
5251 pos_cg = dd->ncg_home;
5253 nat_tot = comm->atomRanges.numHomeAtoms();
5255 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5257 dim = dd->dim[dim_ind];
5258 cd = &comm->cd[dim_ind];
5260 /* Check if we need to compute triclinic distances along this dim */
5261 bool distanceIsTriclinic = false;
5262 for (i = 0; i <= dim_ind; i++)
5264 if (ddbox->tric_dir[dd->dim[i]])
5266 distanceIsTriclinic = true;
5270 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5272 /* No pbc in this dimension, the first node should not comm. */
5280 v_d = ddbox->v[dim];
5281 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5283 cd->receiveInPlace = true;
5284 for (int p = 0; p < cd->numPulses(); p++)
5286 /* Only atoms communicated in the first pulse are used
5287 * for multi-body bonded interactions or for bBondComm.
5289 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5291 gmx_domdec_ind_t *ind = &cd->ind[p];
5293 /* Thread 0 writes in the global index array */
5295 clearCommSetupData(&comm->dth[0]);
5297 for (zone = 0; zone < nzone_send; zone++)
5299 if (dim_ind > 0 && distanceIsTriclinic)
5301 /* Determine slightly more optimized skew_fac's
5303 * This reduces the number of communicated atoms
5304 * by about 10% for 3D DD of rhombic dodecahedra.
5306 for (dimd = 0; dimd < dim; dimd++)
5308 sf2_round[dimd] = 1;
5309 if (ddbox->tric_dir[dimd])
5311 for (i = dd->dim[dimd]+1; i < DIM; i++)
5313 /* If we are shifted in dimension i
5314 * and the cell plane is tilted forward
5315 * in dimension i, skip this coupling.
5317 if (!(zones->shift[nzone+zone][i] &&
5318 ddbox->v[dimd][i][dimd] >= 0))
5321 gmx::square(ddbox->v[dimd][i][dimd]);
5324 sf2_round[dimd] = 1/sf2_round[dimd];
5329 zonei = zone_perm[dim_ind][zone];
5332 /* Here we permutate the zones to obtain a convenient order
5333 * for neighbor searching
5335 cg0 = zone_cg_range[zonei];
5336 cg1 = zone_cg_range[zonei+1];
5340 /* Look only at the cg's received in the previous grid pulse
5342 cg1 = zone_cg_range[nzone+zone+1];
5343 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5346 const int numThreads = static_cast<int>(comm->dth.size());
5347 #pragma omp parallel for num_threads(numThreads) schedule(static)
5348 for (int th = 0; th < numThreads; th++)
5352 dd_comm_setup_work_t &work = comm->dth[th];
5354 /* Retain data accumulated into buffers of thread 0 */
5357 clearCommSetupData(&work);
5360 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5361 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5363 /* Get the cg's for this pulse in this zone */
5364 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5365 dd->globalAtomGroupIndices,
5367 dim, dim_ind, dim0, dim1, dim2,
5369 box, distanceIsTriclinic,
5370 normal, skew_fac2_d, skew_fac_01,
5371 v_d, v_0, v_1, &corners, sf2_round,
5372 bDistBonded, bBondComm,
5375 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5378 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5381 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5382 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5383 ind->nsend[zone] = comm->dth[0].nsend_zone;
5384 /* Append data of threads>=1 to the communication buffers */
5385 for (int th = 1; th < numThreads; th++)
5387 const dd_comm_setup_work_t &dth = comm->dth[th];
5389 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5390 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5391 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5392 comm->dth[0].nat += dth.nat;
5393 ind->nsend[zone] += dth.nsend_zone;
5396 /* Clear the counts in case we do not have pbc */
5397 for (zone = nzone_send; zone < nzone; zone++)
5399 ind->nsend[zone] = 0;
5401 ind->nsend[nzone] = ind->index.size();
5402 ind->nsend[nzone + 1] = comm->dth[0].nat;
5403 /* Communicate the number of cg's and atoms to receive */
5404 ddSendrecv(dd, dim_ind, dddirBackward,
5405 ind->nsend, nzone+2,
5406 ind->nrecv, nzone+2);
5410 /* We can receive in place if only the last zone is not empty */
5411 for (zone = 0; zone < nzone-1; zone++)
5413 if (ind->nrecv[zone] > 0)
5415 cd->receiveInPlace = false;
5420 int receiveBufferSize = 0;
5421 if (!cd->receiveInPlace)
5423 receiveBufferSize = ind->nrecv[nzone];
5425 /* These buffer are actually only needed with in-place */
5426 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5427 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5429 dd_comm_setup_work_t &work = comm->dth[0];
5431 /* Make space for the global cg indices */
5432 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5433 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5434 /* Communicate the global cg indices */
5435 gmx::ArrayRef<int> integerBufferRef;
5436 if (cd->receiveInPlace)
5438 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5442 integerBufferRef = globalAtomGroupBuffer.buffer;
5444 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5445 work.atomGroupBuffer, integerBufferRef);
5447 /* Make space for cg_cm */
5448 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5449 if (fr->cutoff_scheme == ecutsGROUP)
5455 cg_cm = as_rvec_array(state->x.data());
5457 /* Communicate cg_cm */
5458 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5459 if (cd->receiveInPlace)
5461 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5465 rvecBufferRef = rvecBuffer.buffer;
5467 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5468 work.positionBuffer, rvecBufferRef);
5470 /* Make the charge group index */
5471 if (cd->receiveInPlace)
5473 zone = (p == 0 ? 0 : nzone - 1);
5474 while (zone < nzone)
5476 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5478 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5479 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5480 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5481 dd->atomGrouping_.appendBlock(nrcg);
5484 /* Update the charge group presence,
5485 * so we can use it in the next pass of the loop.
5487 comm->bLocalCG[cg_gl] = TRUE;
5493 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5496 zone_cg_range[nzone+zone] = pos_cg;
5501 /* This part of the code is never executed with bBondComm. */
5502 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5503 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5505 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5506 dd->globalAtomGroupIndices, integerBufferRef.data(),
5507 cg_cm, as_rvec_array(rvecBufferRef.data()),
5509 fr->cginfo_mb, fr->cginfo);
5510 pos_cg += ind->nrecv[nzone];
5512 nat_tot += ind->nrecv[nzone+1];
5514 if (!cd->receiveInPlace)
5516 /* Store the atom block for easy copying of communication buffers */
5517 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5522 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5526 /* We don't need to update cginfo, since that was alrady done above.
5527 * So we pass NULL for the forcerec.
5529 dd_set_cginfo(dd->globalAtomGroupIndices,
5530 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5531 nullptr, comm->bLocalCG);
5536 fprintf(debug, "Finished setting up DD communication, zones:");
5537 for (c = 0; c < zones->n; c++)
5539 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5541 fprintf(debug, "\n");
5545 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5549 for (c = 0; c < zones->nizone; c++)
5551 zones->izone[c].cg1 = zones->cg_range[c+1];
5552 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5553 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5557 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5559 * Also sets the atom density for the home zone when \p zone_start=0.
5560 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5561 * how many charge groups will move but are still part of the current range.
5562 * \todo When converting domdec to use proper classes, all these variables
5563 * should be private and a method should return the correct count
5564 * depending on an internal state.
5566 * \param[in,out] dd The domain decomposition struct
5567 * \param[in] box The box
5568 * \param[in] ddbox The domain decomposition box struct
5569 * \param[in] zone_start The start of the zone range to set sizes for
5570 * \param[in] zone_end The end of the zone range to set sizes for
5571 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5573 static void set_zones_size(gmx_domdec_t *dd,
5574 matrix box, const gmx_ddbox_t *ddbox,
5575 int zone_start, int zone_end,
5576 int numMovedChargeGroupsInHomeZone)
5578 gmx_domdec_comm_t *comm;
5579 gmx_domdec_zones_t *zones;
5588 zones = &comm->zones;
5590 /* Do we need to determine extra distances for multi-body bondeds? */
5591 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5593 for (z = zone_start; z < zone_end; z++)
5595 /* Copy cell limits to zone limits.
5596 * Valid for non-DD dims and non-shifted dims.
5598 copy_rvec(comm->cell_x0, zones->size[z].x0);
5599 copy_rvec(comm->cell_x1, zones->size[z].x1);
5602 for (d = 0; d < dd->ndim; d++)
5606 for (z = 0; z < zones->n; z++)
5608 /* With a staggered grid we have different sizes
5609 * for non-shifted dimensions.
5611 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5615 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5616 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5620 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5621 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5627 rcmbs = comm->cutoff_mbody;
5628 if (ddbox->tric_dir[dim])
5630 rcs /= ddbox->skew_fac[dim];
5631 rcmbs /= ddbox->skew_fac[dim];
5634 /* Set the lower limit for the shifted zone dimensions */
5635 for (z = zone_start; z < zone_end; z++)
5637 if (zones->shift[z][dim] > 0)
5640 if (!isDlbOn(dd->comm) || d == 0)
5642 zones->size[z].x0[dim] = comm->cell_x1[dim];
5643 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5647 /* Here we take the lower limit of the zone from
5648 * the lowest domain of the zone below.
5652 zones->size[z].x0[dim] =
5653 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5659 zones->size[z].x0[dim] =
5660 zones->size[zone_perm[2][z-4]].x0[dim];
5664 zones->size[z].x0[dim] =
5665 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5668 /* A temporary limit, is updated below */
5669 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5673 for (zi = 0; zi < zones->nizone; zi++)
5675 if (zones->shift[zi][dim] == 0)
5677 /* This takes the whole zone into account.
5678 * With multiple pulses this will lead
5679 * to a larger zone then strictly necessary.
5681 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5682 zones->size[zi].x1[dim]+rcmbs);
5690 /* Loop over the i-zones to set the upper limit of each
5693 for (zi = 0; zi < zones->nizone; zi++)
5695 if (zones->shift[zi][dim] == 0)
5697 /* We should only use zones up to zone_end */
5698 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5699 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5701 if (zones->shift[z][dim] > 0)
5703 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5704 zones->size[zi].x1[dim]+rcs);
5711 for (z = zone_start; z < zone_end; z++)
5713 /* Initialization only required to keep the compiler happy */
5714 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5717 /* To determine the bounding box for a zone we need to find
5718 * the extreme corners of 4, 2 or 1 corners.
5720 nc = 1 << (ddbox->nboundeddim - 1);
5722 for (c = 0; c < nc; c++)
5724 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5728 corner[YY] = zones->size[z].x0[YY];
5732 corner[YY] = zones->size[z].x1[YY];
5736 corner[ZZ] = zones->size[z].x0[ZZ];
5740 corner[ZZ] = zones->size[z].x1[ZZ];
5742 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5743 box[ZZ][1 - dd->dim[0]] != 0)
5745 /* With 1D domain decomposition the cg's are not in
5746 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5747 * Shift the corner of the z-vector back to along the box
5748 * vector of dimension d, so it will later end up at 0 along d.
5749 * This can affect the location of this corner along dd->dim[0]
5750 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5752 int d = 1 - dd->dim[0];
5754 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5756 /* Apply the triclinic couplings */
5757 assert(ddbox->npbcdim <= DIM);
5758 for (i = YY; i < ddbox->npbcdim; i++)
5760 for (j = XX; j < i; j++)
5762 corner[j] += corner[i]*box[i][j]/box[i][i];
5767 copy_rvec(corner, corner_min);
5768 copy_rvec(corner, corner_max);
5772 for (i = 0; i < DIM; i++)
5774 corner_min[i] = std::min(corner_min[i], corner[i]);
5775 corner_max[i] = std::max(corner_max[i], corner[i]);
5779 /* Copy the extreme cornes without offset along x */
5780 for (i = 0; i < DIM; i++)
5782 zones->size[z].bb_x0[i] = corner_min[i];
5783 zones->size[z].bb_x1[i] = corner_max[i];
5785 /* Add the offset along x */
5786 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5787 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5790 if (zone_start == 0)
5793 for (dim = 0; dim < DIM; dim++)
5795 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5797 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5802 for (z = zone_start; z < zone_end; z++)
5804 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5806 zones->size[z].x0[XX], zones->size[z].x1[XX],
5807 zones->size[z].x0[YY], zones->size[z].x1[YY],
5808 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5809 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5811 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5812 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5813 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5818 static int comp_cgsort(const void *a, const void *b)
5822 gmx_cgsort_t *cga, *cgb;
5823 cga = (gmx_cgsort_t *)a;
5824 cgb = (gmx_cgsort_t *)b;
5826 comp = cga->nsc - cgb->nsc;
5829 comp = cga->ind_gl - cgb->ind_gl;
5835 /* Order data in \p dataToSort according to \p sort
5837 * Note: both buffers should have at least \p sort.size() elements.
5839 template <typename T>
5841 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5842 gmx::ArrayRef<T> dataToSort,
5843 gmx::ArrayRef<T> sortBuffer)
5845 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5846 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5848 /* Order the data into the temporary buffer */
5850 for (const gmx_cgsort_t &entry : sort)
5852 sortBuffer[i++] = dataToSort[entry.ind];
5855 /* Copy back to the original array */
5856 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5857 dataToSort.begin());
5860 /* Order data in \p dataToSort according to \p sort
5862 * Note: \p vectorToSort should have at least \p sort.size() elements,
5863 * \p workVector is resized when it is too small.
5865 template <typename T>
5867 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5868 gmx::ArrayRef<T> vectorToSort,
5869 std::vector<T> *workVector)
5871 if (workVector->size() < sort.size())
5873 workVector->resize(sort.size());
5875 orderVector<T>(sort, vectorToSort, *workVector);
5878 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5879 gmx::ArrayRef<const gmx_cgsort_t> sort,
5880 gmx::ArrayRef<gmx::RVec> v,
5881 gmx::ArrayRef<gmx::RVec> buf)
5883 if (atomGroups == nullptr)
5885 /* Avoid the useless loop of the atoms within a cg */
5886 orderVector(sort, v, buf);
5891 /* Order the data */
5893 for (const gmx_cgsort_t &entry : sort)
5895 for (int i : atomGroups->block(entry.ind))
5897 copy_rvec(v[i], buf[a]);
5903 /* Copy back to the original array */
5904 for (int a = 0; a < atot; a++)
5906 copy_rvec(buf[a], v[a]);
5910 /* Returns whether a < b */
5911 static bool compareCgsort(const gmx_cgsort_t &a,
5912 const gmx_cgsort_t &b)
5914 return (a.nsc < b.nsc ||
5915 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5918 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5919 gmx::ArrayRef<gmx_cgsort_t> moved,
5920 std::vector<gmx_cgsort_t> *sort1)
5922 /* The new indices are not very ordered, so we qsort them */
5923 gmx_qsort_threadsafe(moved.data(), moved.size(), sizeof(moved[0]), comp_cgsort);
5925 /* stationary is already ordered, so now we can merge the two arrays */
5926 sort1->resize(stationary.size() + moved.size());
5927 std::merge(stationary.begin(), stationary.end(),
5928 moved.begin(), moved.end(),
5933 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5934 * The order is according to the global charge group index.
5935 * This adds and removes charge groups that moved between domains.
5937 static void dd_sort_order(const gmx_domdec_t *dd,
5938 const t_forcerec *fr,
5940 gmx_domdec_sort_t *sort)
5942 const int *a = fr->ns->grid->cell_index;
5944 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5946 if (ncg_home_old >= 0)
5948 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5949 std::vector<gmx_cgsort_t> &moved = sort->moved;
5951 /* The charge groups that remained in the same ns grid cell
5952 * are completely ordered. So we can sort efficiently by sorting
5953 * the charge groups that did move into the stationary list.
5954 * Note: push_back() seems to be slightly slower than direct access.
5958 for (int i = 0; i < dd->ncg_home; i++)
5960 /* Check if this cg did not move to another node */
5961 if (a[i] < movedValue)
5965 entry.ind_gl = dd->globalAtomGroupIndices[i];
5968 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5970 /* This cg is new on this node or moved ns grid cell */
5971 moved.push_back(entry);
5975 /* This cg did not move */
5976 stationary.push_back(entry);
5983 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5984 stationary.size(), moved.size());
5986 /* Sort efficiently */
5987 orderedSort(stationary, moved, &sort->sorted);
5991 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5993 cgsort.reserve(dd->ncg_home);
5995 for (int i = 0; i < dd->ncg_home; i++)
5997 /* Sort on the ns grid cell indices
5998 * and the global topology index
6002 entry.ind_gl = dd->globalAtomGroupIndices[i];
6004 cgsort.push_back(entry);
6005 if (cgsort[i].nsc < movedValue)
6012 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
6014 /* Determine the order of the charge groups using qsort */
6015 gmx_qsort_threadsafe(cgsort.data(), dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
6017 /* Remove the charge groups which are no longer at home here */
6018 cgsort.resize(numCGNew);
6022 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
6023 static void dd_sort_order_nbnxn(const t_forcerec *fr,
6024 std::vector<gmx_cgsort_t> *sort)
6026 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
6028 /* Using push_back() instead of this resize results in much slower code */
6029 sort->resize(atomOrder.size());
6030 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
6031 size_t numSorted = 0;
6032 for (int i : atomOrder)
6036 /* The values of nsc and ind_gl are not used in this case */
6037 buffer[numSorted++].ind = i;
6040 sort->resize(numSorted);
6043 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6046 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6048 switch (fr->cutoff_scheme)
6051 dd_sort_order(dd, fr, ncg_home_old, sort);
6054 dd_sort_order_nbnxn(fr, &sort->sorted);
6057 gmx_incons("unimplemented");
6060 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6062 /* We alloc with the old size, since cgindex is still old */
6063 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6064 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6066 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6068 /* Set the new home atom/charge group count */
6069 dd->ncg_home = sort->sorted.size();
6072 fprintf(debug, "Set the new home charge group count to %d\n",
6076 /* Reorder the state */
6077 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6078 GMX_RELEASE_ASSERT(cgsort.size() == static_cast<size_t>(dd->ncg_home), "We should sort all the home atom groups");
6080 if (state->flags & (1 << estX))
6082 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6084 if (state->flags & (1 << estV))
6086 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6088 if (state->flags & (1 << estCGP))
6090 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6093 if (fr->cutoff_scheme == ecutsGROUP)
6096 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6097 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6100 /* Reorder the global cg index */
6101 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6102 /* Reorder the cginfo */
6103 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6104 /* Rebuild the local cg index */
6107 /* We make a new, ordered atomGroups object and assign it to
6108 * the old one. This causes some allocation overhead, but saves
6109 * a copy back of the whole index.
6111 gmx::RangePartitioning ordered;
6112 for (const gmx_cgsort_t &entry : cgsort)
6114 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6116 dd->atomGrouping_ = ordered;
6120 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6122 /* Set the home atom number */
6123 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6125 if (fr->cutoff_scheme == ecutsVERLET)
6127 /* The atoms are now exactly in grid order, update the grid order */
6128 nbnxn_set_atomorder(fr->nbv->nbs.get());
6132 /* Copy the sorted ns cell indices back to the ns grid struct */
6133 for (size_t i = 0; i < cgsort.size(); i++)
6135 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6137 fr->ns->grid->nr = cgsort.size();
6141 static void add_dd_statistics(gmx_domdec_t *dd)
6143 gmx_domdec_comm_t *comm = dd->comm;
6145 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6147 auto range = static_cast<DDAtomRanges::Type>(i);
6149 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6154 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6156 gmx_domdec_comm_t *comm = dd->comm;
6158 /* Reset all the statistics and counters for total run counting */
6159 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6161 comm->sum_nat[i] = 0;
6165 comm->load_step = 0;
6168 clear_ivec(comm->load_lim);
6173 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6175 gmx_domdec_comm_t *comm = cr->dd->comm;
6177 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6178 gmx_sumd(numRanges, comm->sum_nat, cr);
6180 if (fplog == nullptr)
6185 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6187 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6189 auto range = static_cast<DDAtomRanges::Type>(i);
6190 double av = comm->sum_nat[i]/comm->ndecomp;
6193 case DDAtomRanges::Type::Zones:
6195 " av. #atoms communicated per step for force: %d x %.1f\n",
6198 case DDAtomRanges::Type::Vsites:
6199 if (cr->dd->vsite_comm)
6202 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6203 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6207 case DDAtomRanges::Type::Constraints:
6208 if (cr->dd->constraint_comm)
6211 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6212 1 + ir->nLincsIter, av);
6216 gmx_incons(" Unknown type for DD statistics");
6219 fprintf(fplog, "\n");
6221 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6223 print_dd_load_av(fplog, cr->dd);
6227 void dd_partition_system(FILE *fplog,
6229 const t_commrec *cr,
6230 gmx_bool bMasterState,
6232 t_state *state_global,
6233 const gmx_mtop_t *top_global,
6234 const t_inputrec *ir,
6235 t_state *state_local,
6236 PaddedRVecVector *f,
6237 gmx::MDAtoms *mdAtoms,
6238 gmx_localtop_t *top_local,
6241 gmx::Constraints *constr,
6243 gmx_wallcycle *wcycle,
6247 gmx_domdec_comm_t *comm;
6248 gmx_ddbox_t ddbox = {0};
6250 gmx_int64_t step_pcoupl;
6251 rvec cell_ns_x0, cell_ns_x1;
6252 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6253 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6254 gmx_bool bRedist, bSortCG, bResortAll;
6255 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6259 wallcycle_start(wcycle, ewcDOMDEC);
6264 // TODO if the update code becomes accessible here, use
6265 // upd->deform for this logic.
6266 bBoxChanged = (bMasterState || inputrecDeform(ir));
6267 if (ir->epc != epcNO)
6269 /* With nstpcouple > 1 pressure coupling happens.
6270 * one step after calculating the pressure.
6271 * Box scaling happens at the end of the MD step,
6272 * after the DD partitioning.
6273 * We therefore have to do DLB in the first partitioning
6274 * after an MD step where P-coupling occurred.
6275 * We need to determine the last step in which p-coupling occurred.
6276 * MRS -- need to validate this for vv?
6278 int n = ir->nstpcouple;
6281 step_pcoupl = step - 1;
6285 step_pcoupl = ((step - 1)/n)*n + 1;
6287 if (step_pcoupl >= comm->partition_step)
6293 bNStGlobalComm = (step % nstglobalcomm == 0);
6301 /* Should we do dynamic load balacing this step?
6302 * Since it requires (possibly expensive) global communication,
6303 * we might want to do DLB less frequently.
6305 if (bBoxChanged || ir->epc != epcNO)
6307 bDoDLB = bBoxChanged;
6311 bDoDLB = bNStGlobalComm;
6315 /* Check if we have recorded loads on the nodes */
6316 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6318 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6320 /* Print load every nstlog, first and last step to the log file */
6321 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6322 comm->n_load_collect == 0 ||
6324 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6326 /* Avoid extra communication due to verbose screen output
6327 * when nstglobalcomm is set.
6329 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6330 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6332 get_load_distribution(dd, wcycle);
6337 dd_print_load(fplog, dd, step-1);
6341 dd_print_load_verbose(dd);
6344 comm->n_load_collect++;
6350 /* Add the measured cycles to the running average */
6351 const float averageFactor = 0.1f;
6352 comm->cyclesPerStepDlbExpAverage =
6353 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6354 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6356 if (comm->dlbState == edlbsOnCanTurnOff &&
6357 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6359 gmx_bool turnOffDlb;
6362 /* If the running averaged cycles with DLB are more
6363 * than before we turned on DLB, turn off DLB.
6364 * We will again run and check the cycles without DLB
6365 * and we can then decide if to turn off DLB forever.
6367 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6368 comm->cyclesPerStepBeforeDLB);
6370 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6373 /* To turn off DLB, we need to redistribute the atoms */
6374 dd_collect_state(dd, state_local, state_global);
6375 bMasterState = TRUE;
6376 turn_off_dlb(fplog, cr, step);
6380 else if (bCheckWhetherToTurnDlbOn)
6382 gmx_bool turnOffDlbForever = FALSE;
6383 gmx_bool turnOnDlb = FALSE;
6385 /* Since the timings are node dependent, the master decides */
6388 /* If we recently turned off DLB, we want to check if
6389 * performance is better without DLB. We want to do this
6390 * ASAP to minimize the chance that external factors
6391 * slowed down the DLB step are gone here and we
6392 * incorrectly conclude that DLB was causing the slowdown.
6393 * So we measure one nstlist block, no running average.
6395 if (comm->haveTurnedOffDlb &&
6396 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6397 comm->cyclesPerStepDlbExpAverage)
6399 /* After turning off DLB we ran nstlist steps in fewer
6400 * cycles than with DLB. This likely means that DLB
6401 * in not benefical, but this could be due to a one
6402 * time unlucky fluctuation, so we require two such
6403 * observations in close succession to turn off DLB
6406 if (comm->dlbSlowerPartitioningCount > 0 &&
6407 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6409 turnOffDlbForever = TRUE;
6411 comm->haveTurnedOffDlb = false;
6412 /* Register when we last measured DLB slowdown */
6413 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6417 /* Here we check if the max PME rank load is more than 0.98
6418 * the max PP force load. If so, PP DLB will not help,
6419 * since we are (almost) limited by PME. Furthermore,
6420 * DLB will cause a significant extra x/f redistribution
6421 * cost on the PME ranks, which will then surely result
6422 * in lower total performance.
6424 if (cr->npmenodes > 0 &&
6425 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6431 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6437 gmx_bool turnOffDlbForever;
6441 turnOffDlbForever, turnOnDlb
6443 dd_bcast(dd, sizeof(bools), &bools);
6444 if (bools.turnOffDlbForever)
6446 turn_off_dlb_forever(fplog, cr, step);
6448 else if (bools.turnOnDlb)
6450 turn_on_dlb(fplog, cr, step);
6455 comm->n_load_have++;
6458 cgs_gl = &comm->cgs_gl;
6463 /* Clear the old state */
6464 clearDDStateIndices(dd, 0, 0);
6467 auto xGlobal = positionsFromStatePointer(state_global);
6469 set_ddbox(dd, true, ir,
6470 DDMASTER(dd) ? state_global->box : nullptr,
6474 distributeState(fplog, dd, state_global, ddbox, state_local, f);
6476 dd_make_local_cgs(dd, &top_local->cgs);
6478 /* Ensure that we have space for the new distribution */
6479 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6481 if (fr->cutoff_scheme == ecutsGROUP)
6483 calc_cgcm(fplog, 0, dd->ncg_home,
6484 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6487 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6489 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6491 else if (state_local->ddp_count != dd->ddp_count)
6493 if (state_local->ddp_count > dd->ddp_count)
6495 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
6498 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6500 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6503 /* Clear the old state */
6504 clearDDStateIndices(dd, 0, 0);
6506 /* Restore the atom group indices from state_local */
6507 restoreAtomGroups(dd, cgs_gl->index, state_local);
6508 make_dd_indices(dd, cgs_gl->index, 0);
6509 ncgindex_set = dd->ncg_home;
6511 if (fr->cutoff_scheme == ecutsGROUP)
6513 /* Redetermine the cg COMs */
6514 calc_cgcm(fplog, 0, dd->ncg_home,
6515 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6518 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6520 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6522 set_ddbox(dd, bMasterState, ir, state_local->box,
6523 true, state_local->x, &ddbox);
6525 bRedist = isDlbOn(comm);
6529 /* We have the full state, only redistribute the cgs */
6531 /* Clear the non-home indices */
6532 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6535 /* Avoid global communication for dim's without pbc and -gcom */
6536 if (!bNStGlobalComm)
6538 copy_rvec(comm->box0, ddbox.box0 );
6539 copy_rvec(comm->box_size, ddbox.box_size);
6541 set_ddbox(dd, bMasterState, ir, state_local->box,
6542 bNStGlobalComm, state_local->x, &ddbox);
6547 /* For dim's without pbc and -gcom */
6548 copy_rvec(ddbox.box0, comm->box0 );
6549 copy_rvec(ddbox.box_size, comm->box_size);
6551 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6554 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6556 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6559 /* Check if we should sort the charge groups */
6560 bSortCG = (bMasterState || bRedist);
6562 ncg_home_old = dd->ncg_home;
6564 /* When repartitioning we mark charge groups that will move to neighboring
6565 * DD cells, but we do not move them right away for performance reasons.
6566 * Thus we need to keep track of how many charge groups will move for
6567 * obtaining correct local charge group / atom counts.
6572 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6574 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6576 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
6578 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6581 if (fr->cutoff_scheme == ecutsGROUP)
6583 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6585 &comm->cell_x0, &comm->cell_x1,
6586 dd->ncg_home, fr->cg_cm,
6587 cell_ns_x0, cell_ns_x1, &grid_density);
6591 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6595 switch (fr->cutoff_scheme)
6598 copy_ivec(fr->ns->grid->n, ncells_old);
6599 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6600 state_local->box, cell_ns_x0, cell_ns_x1,
6601 fr->rlist, grid_density);
6604 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6607 gmx_incons("unimplemented");
6609 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6610 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6614 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6616 /* Sort the state on charge group position.
6617 * This enables exact restarts from this step.
6618 * It also improves performance by about 15% with larger numbers
6619 * of atoms per node.
6622 /* Fill the ns grid with the home cell,
6623 * so we can sort with the indices.
6625 set_zones_ncg_home(dd);
6627 switch (fr->cutoff_scheme)
6630 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6632 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6634 comm->zones.size[0].bb_x0,
6635 comm->zones.size[0].bb_x1,
6637 comm->zones.dens_zone0,
6639 as_rvec_array(state_local->x.data()),
6640 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6641 fr->nbv->grp[eintLocal].kernel_type,
6644 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6647 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6648 0, dd->ncg_home, fr->cg_cm);
6650 copy_ivec(fr->ns->grid->n, ncells_new);
6653 gmx_incons("unimplemented");
6656 bResortAll = bMasterState;
6658 /* Check if we can user the old order and ns grid cell indices
6659 * of the charge groups to sort the charge groups efficiently.
6661 if (ncells_new[XX] != ncells_old[XX] ||
6662 ncells_new[YY] != ncells_old[YY] ||
6663 ncells_new[ZZ] != ncells_old[ZZ])
6670 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6671 gmx_step_str(step, sbuf), dd->ncg_home);
6673 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6674 bResortAll ? -1 : ncg_home_old);
6676 /* After sorting and compacting we set the correct size */
6677 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6679 /* Rebuild all the indices */
6680 ga2la_clear(dd->ga2la);
6683 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6686 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6688 /* Setup up the communication and communicate the coordinates */
6689 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6691 /* Set the indices */
6692 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6694 /* Set the charge group boundaries for neighbor searching */
6695 set_cg_boundaries(&comm->zones);
6697 if (fr->cutoff_scheme == ecutsVERLET)
6699 /* When bSortCG=true, we have already set the size for zone 0 */
6700 set_zones_size(dd, state_local->box, &ddbox,
6701 bSortCG ? 1 : 0, comm->zones.n,
6705 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6708 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6709 -1,as_rvec_array(state_local->x.data()),state_local->box);
6712 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6714 /* Extract a local topology from the global topology */
6715 for (int i = 0; i < dd->ndim; i++)
6717 np[dd->dim[i]] = comm->cd[i].numPulses();
6719 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6720 comm->cellsize_min, np,
6722 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6723 vsite, top_global, top_local);
6725 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6727 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6729 /* Set up the special atom communication */
6730 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6731 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6733 auto range = static_cast<DDAtomRanges::Type>(i);
6736 case DDAtomRanges::Type::Vsites:
6737 if (vsite && vsite->n_intercg_vsite)
6739 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6742 case DDAtomRanges::Type::Constraints:
6743 if (dd->bInterCGcons || dd->bInterCGsettles)
6745 /* Only for inter-cg constraints we need special code */
6746 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6747 constr, ir->nProjOrder,
6748 top_local->idef.il);
6752 gmx_incons("Unknown special atom type setup");
6754 comm->atomRanges.setEnd(range, n);
6757 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6759 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6761 /* Make space for the extra coordinates for virtual site
6762 * or constraint communication.
6764 state_local->natoms = comm->atomRanges.numAtomsTotal();
6766 dd_resize_state(state_local, f, state_local->natoms);
6768 if (fr->haveDirectVirialContributions)
6770 if (vsite && vsite->n_intercg_vsite)
6772 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6776 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6778 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6782 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6791 /* Set the number of atoms required for the force calculation.
6792 * Forces need to be constrained when doing energy
6793 * minimization. For simple simulations we could avoid some
6794 * allocation, zeroing and copying, but this is probably not worth
6795 * the complications and checking.
6797 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6798 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6799 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6802 /* Update atom data for mdatoms and several algorithms */
6803 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6804 nullptr, mdAtoms, constr, vsite, nullptr);
6806 auto mdatoms = mdAtoms->mdatoms();
6807 if (!thisRankHasDuty(cr, DUTY_PME))
6809 /* Send the charges and/or c6/sigmas to our PME only node */
6810 gmx_pme_send_parameters(cr,
6812 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
6813 mdatoms->chargeA, mdatoms->chargeB,
6814 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6815 mdatoms->sigmaA, mdatoms->sigmaB,
6816 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6821 /* Update the local pull groups */
6822 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
6827 /* Update the local rotation groups */
6828 dd_make_local_rotation_groups(dd, ir->rot);
6831 if (ir->eSwapCoords != eswapNO)
6833 /* Update the local groups needed for ion swapping */
6834 dd_make_local_swap_groups(dd, ir->swap);
6837 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6838 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6840 add_dd_statistics(dd);
6842 /* Make sure we only count the cycles for this DD partitioning */
6843 clear_dd_cycle_counts(dd);
6845 /* Because the order of the atoms might have changed since
6846 * the last vsite construction, we need to communicate the constructing
6847 * atom coordinates again (for spreading the forces this MD step).
6849 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6851 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6853 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6855 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6856 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6857 -1, as_rvec_array(state_local->x.data()), state_local->box);
6860 /* Store the partitioning step */
6861 comm->partition_step = step;
6863 /* Increase the DD partitioning counter */
6865 /* The state currently matches this DD partitioning count, store it */
6866 state_local->ddp_count = dd->ddp_count;
6869 /* The DD master node knows the complete cg distribution,
6870 * store the count so we can possibly skip the cg info communication.
6872 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6875 if (comm->DD_debug > 0)
6877 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6878 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6879 "after partitioning");
6882 wallcycle_stop(wcycle, ewcDOMDEC);
6885 /*! \brief Check whether bonded interactions are missing, if appropriate */
6886 void checkNumberOfBondedInteractions(FILE *fplog,
6888 int totalNumberOfBondedInteractions,
6889 const gmx_mtop_t *top_global,
6890 const gmx_localtop_t *top_local,
6891 const t_state *state,
6892 bool *shouldCheckNumberOfBondedInteractions)
6894 if (*shouldCheckNumberOfBondedInteractions)
6896 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6898 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6900 *shouldCheckNumberOfBondedInteractions = false;