2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/gmx_omp_nthreads.h"
74 #include "gromacs/mdlib/lincs.h"
75 #include "gromacs/mdlib/mdatoms.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/mdsetup.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nsgrid.h"
81 #include "gromacs/mdlib/vsite.h"
82 #include "gromacs/mdtypes/commrec.h"
83 #include "gromacs/mdtypes/df_history.h"
84 #include "gromacs/mdtypes/forcerec.h"
85 #include "gromacs/mdtypes/inputrec.h"
86 #include "gromacs/mdtypes/md_enums.h"
87 #include "gromacs/mdtypes/mdatom.h"
88 #include "gromacs/mdtypes/nblist.h"
89 #include "gromacs/mdtypes/state.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/pbc.h"
92 #include "gromacs/pulling/pull.h"
93 #include "gromacs/pulling/pull_rotation.h"
94 #include "gromacs/swap/swapcoords.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/qsort_threadsafe.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/stringutil.h"
113 #include "atomdistribution.h"
114 #include "cellsizes.h"
115 #include "distribute.h"
116 #include "domdec_constraints.h"
117 #include "domdec_internal.h"
118 #include "domdec_vsite.h"
119 #include "redistribute.h"
122 #define DD_NLOAD_MAX 9
124 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
126 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
129 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
130 #define DD_FLAG_NRCG 65535
131 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
132 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
134 /* The DD zone order */
135 static const ivec dd_zo[DD_MAXZONE] =
136 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
138 /* The non-bonded zone-pair setup for domain decomposition
139 * The first number is the i-zone, the second number the first j-zone seen by
140 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
141 * As is, this is for 3D decomposition, where there are 4 i-zones.
142 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
143 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
146 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
151 /* Turn on DLB when the load imbalance causes this amount of total loss.
152 * There is a bit of overhead with DLB and it's difficult to achieve
153 * a load imbalance of less than 2% with DLB.
155 #define DD_PERF_LOSS_DLB_ON 0.02
157 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
158 #define DD_PERF_LOSS_WARN 0.05
161 /* We check if to turn on DLB at the first and every 100 DD partitionings.
162 * With large imbalance DLB will turn on at the first step, so we can
163 * make the interval so large that the MPI overhead of the check is negligible.
165 static const int c_checkTurnDlbOnInterval = 100;
166 /* We need to check if DLB results in worse performance and then turn it off.
167 * We check this more often then for turning DLB on, because the DLB can scale
168 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
169 * and furthermore, we are already synchronizing often with DLB, so
170 * the overhead of the MPI Bcast is not that high.
172 static const int c_checkTurnDlbOffInterval = 20;
174 /* Forward declaration */
175 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
179 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
181 static void index2xyz(ivec nc,int ind,ivec xyz)
183 xyz[XX] = ind % nc[XX];
184 xyz[YY] = (ind / nc[XX]) % nc[YY];
185 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
189 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
191 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
192 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
193 xyz[ZZ] = ind % nc[ZZ];
196 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
201 ddindex = dd_index(dd->nc, c);
202 if (dd->comm->bCartesianPP_PME)
204 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
206 else if (dd->comm->bCartesianPP)
209 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
220 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
222 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
225 int ddglatnr(const gmx_domdec_t *dd, int i)
235 if (i >= dd->comm->atomRanges.numAtomsTotal())
237 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
239 atnr = dd->globalAtomIndices[i] + 1;
245 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
247 return &dd->comm->cgs_gl;
250 void dd_store_state(gmx_domdec_t *dd, t_state *state)
254 if (state->ddp_count != dd->ddp_count)
256 gmx_incons("The MD state does not match the domain decomposition state");
259 state->cg_gl.resize(dd->ncg_home);
260 for (i = 0; i < dd->ncg_home; i++)
262 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
265 state->ddp_count_cg_gl = dd->ddp_count;
268 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
270 return &dd->comm->zones;
273 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
274 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
276 gmx_domdec_zones_t *zones;
279 zones = &dd->comm->zones;
282 while (icg >= zones->izone[izone].cg1)
291 else if (izone < zones->nizone)
293 *jcg0 = zones->izone[izone].jcg0;
297 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
298 icg, izone, zones->nizone);
301 *jcg1 = zones->izone[izone].jcg1;
303 for (d = 0; d < dd->ndim; d++)
306 shift0[dim] = zones->izone[izone].shift0[dim];
307 shift1[dim] = zones->izone[izone].shift1[dim];
308 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
310 /* A conservative approach, this can be optimized */
317 int dd_numHomeAtoms(const gmx_domdec_t &dd)
319 return dd.comm->atomRanges.numHomeAtoms();
322 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
324 /* We currently set mdatoms entries for all atoms:
325 * local + non-local + communicated for vsite + constraints
328 return dd->comm->atomRanges.numAtomsTotal();
331 int dd_natoms_vsite(const gmx_domdec_t *dd)
333 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
336 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
338 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
339 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
342 void dd_move_x(gmx_domdec_t *dd,
344 gmx::ArrayRef<gmx::RVec> x,
345 gmx_wallcycle *wcycle)
347 wallcycle_start(wcycle, ewcMOVEX);
350 gmx_domdec_comm_t *comm;
351 gmx_domdec_comm_dim_t *cd;
352 rvec shift = {0, 0, 0};
353 gmx_bool bPBC, bScrew;
357 const RangePartitioning &atomGrouping = dd->atomGrouping();
360 nat_tot = comm->atomRanges.numHomeAtoms();
361 for (int d = 0; d < dd->ndim; d++)
363 bPBC = (dd->ci[dd->dim[d]] == 0);
364 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
367 copy_rvec(box[dd->dim[d]], shift);
370 for (const gmx_domdec_ind_t &ind : cd->ind)
372 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
373 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
377 for (int g : ind.index)
379 for (int j : atomGrouping.block(g))
381 sendBuffer[n] = x[j];
388 for (int g : ind.index)
390 for (int j : atomGrouping.block(g))
392 /* We need to shift the coordinates */
393 for (int d = 0; d < DIM; d++)
395 sendBuffer[n][d] = x[j][d] + shift[d];
403 for (int g : ind.index)
405 for (int j : atomGrouping.block(g))
408 sendBuffer[n][XX] = x[j][XX] + shift[XX];
410 * This operation requires a special shift force
411 * treatment, which is performed in calc_vir.
413 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
414 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
420 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
422 gmx::ArrayRef<gmx::RVec> receiveBuffer;
423 if (cd->receiveInPlace)
425 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
429 receiveBuffer = receiveBufferAccess.buffer;
431 /* Send and receive the coordinates */
432 ddSendrecv(dd, d, dddirBackward,
433 sendBuffer, receiveBuffer);
435 if (!cd->receiveInPlace)
438 for (int zone = 0; zone < nzone; zone++)
440 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
442 x[i] = receiveBuffer[j++];
446 nat_tot += ind.nrecv[nzone+1];
451 wallcycle_stop(wcycle, ewcMOVEX);
454 void dd_move_f(gmx_domdec_t *dd,
455 gmx::ArrayRef<gmx::RVec> f,
457 gmx_wallcycle *wcycle)
459 wallcycle_start(wcycle, ewcMOVEF);
462 gmx_domdec_comm_t *comm;
463 gmx_domdec_comm_dim_t *cd;
466 gmx_bool bShiftForcesNeedPbc, bScrew;
470 const RangePartitioning &atomGrouping = dd->atomGrouping();
472 nzone = comm->zones.n/2;
473 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
474 for (int d = dd->ndim-1; d >= 0; d--)
476 /* Only forces in domains near the PBC boundaries need to
477 consider PBC in the treatment of fshift */
478 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
479 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
480 if (fshift == nullptr && !bScrew)
482 bShiftForcesNeedPbc = FALSE;
484 /* Determine which shift vector we need */
490 for (int p = cd->numPulses() - 1; p >= 0; p--)
492 const gmx_domdec_ind_t &ind = cd->ind[p];
493 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
494 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
496 nat_tot -= ind.nrecv[nzone+1];
498 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
500 gmx::ArrayRef<gmx::RVec> sendBuffer;
501 if (cd->receiveInPlace)
503 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
507 sendBuffer = sendBufferAccess.buffer;
509 for (int zone = 0; zone < nzone; zone++)
511 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
513 sendBuffer[j++] = f[i];
517 /* Communicate the forces */
518 ddSendrecv(dd, d, dddirForward,
519 sendBuffer, receiveBuffer);
520 /* Add the received forces */
522 if (!bShiftForcesNeedPbc)
524 for (int g : ind.index)
526 for (int j : atomGrouping.block(g))
528 for (int d = 0; d < DIM; d++)
530 f[j][d] += receiveBuffer[n][d];
538 /* fshift should always be defined if this function is
539 * called when bShiftForcesNeedPbc is true */
540 assert(nullptr != fshift);
541 for (int g : ind.index)
543 for (int j : atomGrouping.block(g))
545 for (int d = 0; d < DIM; d++)
547 f[j][d] += receiveBuffer[n][d];
549 /* Add this force to the shift force */
550 for (int d = 0; d < DIM; d++)
552 fshift[is][d] += receiveBuffer[n][d];
560 for (int g : ind.index)
562 for (int j : atomGrouping.block(g))
564 /* Rotate the force */
565 f[j][XX] += receiveBuffer[n][XX];
566 f[j][YY] -= receiveBuffer[n][YY];
567 f[j][ZZ] -= receiveBuffer[n][ZZ];
570 /* Add this force to the shift force */
571 for (int d = 0; d < DIM; d++)
573 fshift[is][d] += receiveBuffer[n][d];
583 wallcycle_stop(wcycle, ewcMOVEF);
586 /* Convenience function for extracting a real buffer from an rvec buffer
588 * To reduce the number of temporary communication buffers and avoid
589 * cache polution, we reuse gmx::RVec buffers for storing reals.
590 * This functions return a real buffer reference with the same number
591 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
593 static gmx::ArrayRef<real>
594 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
596 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
600 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
603 gmx_domdec_comm_t *comm;
604 gmx_domdec_comm_dim_t *cd;
608 const RangePartitioning &atomGrouping = dd->atomGrouping();
611 nat_tot = comm->atomRanges.numHomeAtoms();
612 for (int d = 0; d < dd->ndim; d++)
615 for (const gmx_domdec_ind_t &ind : cd->ind)
617 /* Note: We provision for RVec instead of real, so a factor of 3
618 * more than needed. The buffer actually already has this size
619 * and we pass a plain pointer below, so this does not matter.
621 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
622 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
624 for (int g : ind.index)
626 for (int j : atomGrouping.block(g))
628 sendBuffer[n++] = v[j];
632 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
634 gmx::ArrayRef<real> receiveBuffer;
635 if (cd->receiveInPlace)
637 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
641 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
643 /* Send and receive the data */
644 ddSendrecv(dd, d, dddirBackward,
645 sendBuffer, receiveBuffer);
646 if (!cd->receiveInPlace)
649 for (int zone = 0; zone < nzone; zone++)
651 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
653 v[i] = receiveBuffer[j++];
657 nat_tot += ind.nrecv[nzone+1];
663 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
666 gmx_domdec_comm_t *comm;
667 gmx_domdec_comm_dim_t *cd;
671 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
673 nzone = comm->zones.n/2;
674 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
675 for (int d = dd->ndim-1; d >= 0; d--)
678 for (int p = cd->numPulses() - 1; p >= 0; p--)
680 const gmx_domdec_ind_t &ind = cd->ind[p];
682 /* Note: We provision for RVec instead of real, so a factor of 3
683 * more than needed. The buffer actually already has this size
684 * and we typecast, so this works as intended.
686 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
687 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
688 nat_tot -= ind.nrecv[nzone + 1];
690 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
692 gmx::ArrayRef<real> sendBuffer;
693 if (cd->receiveInPlace)
695 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
699 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
701 for (int zone = 0; zone < nzone; zone++)
703 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
705 sendBuffer[j++] = v[i];
709 /* Communicate the forces */
710 ddSendrecv(dd, d, dddirForward,
711 sendBuffer, receiveBuffer);
712 /* Add the received forces */
714 for (int g : ind.index)
716 for (int j : atomGrouping.block(g))
718 v[j] += receiveBuffer[n];
727 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
729 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
731 zone->min0, zone->max1,
732 zone->mch0, zone->mch0,
733 zone->p1_0, zone->p1_1);
736 /* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
737 * takes the extremes over all home and remote zones in the halo
738 * and returns the results in cell_ns_x0 and cell_ns_x1.
739 * Note: only used with the group cut-off scheme.
741 static void dd_move_cellx(gmx_domdec_t *dd,
742 const gmx_ddbox_t *ddbox,
746 constexpr int c_ddZoneCommMaxNumZones = 5;
747 gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
748 gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
749 gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
750 gmx_domdec_comm_t *comm = dd->comm;
754 for (int d = 1; d < dd->ndim; d++)
756 int dim = dd->dim[d];
757 gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
759 /* Copy the base sizes of the home zone */
760 zp.min0 = cell_ns_x0[dim];
761 zp.max1 = cell_ns_x1[dim];
762 zp.min1 = cell_ns_x1[dim];
763 zp.mch0 = cell_ns_x0[dim];
764 zp.mch1 = cell_ns_x1[dim];
765 zp.p1_0 = cell_ns_x0[dim];
766 zp.p1_1 = cell_ns_x1[dim];
769 gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
771 /* Loop backward over the dimensions and aggregate the extremes
774 for (int d = dd->ndim - 2; d >= 0; d--)
776 const int dim = dd->dim[d];
777 const bool applyPbc = (dim < ddbox->npbcdim);
779 /* Use an rvec to store two reals */
780 extr_s[d][0] = cellsizes[d + 1].fracLower;
781 extr_s[d][1] = cellsizes[d + 1].fracUpper;
782 extr_s[d][2] = cellsizes[d + 1].fracUpper;
785 GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
786 /* Store the extremes in the backward sending buffer,
787 * so they get updated separately from the forward communication.
789 for (int d1 = d; d1 < dd->ndim-1; d1++)
791 /* We invert the order to be able to use the same loop for buf_e */
792 buf_s[pos].min0 = extr_s[d1][1];
793 buf_s[pos].max1 = extr_s[d1][0];
794 buf_s[pos].min1 = extr_s[d1][2];
797 /* Store the cell corner of the dimension we communicate along */
798 buf_s[pos].p1_0 = comm->cell_x0[dim];
803 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
806 if (dd->ndim == 3 && d == 0)
808 buf_s[pos] = comm->zone_d2[0][1];
810 buf_s[pos] = comm->zone_d1[0];
814 /* We only need to communicate the extremes
815 * in the forward direction
817 int numPulses = comm->cd[d].numPulses();
821 /* Take the minimum to avoid double communication */
822 numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
826 /* Without PBC we should really not communicate over
827 * the boundaries, but implementing that complicates
828 * the communication setup and therefore we simply
829 * do all communication, but ignore some data.
831 numPulsesMin = numPulses;
833 for (int pulse = 0; pulse < numPulsesMin; pulse++)
835 /* Communicate the extremes forward */
836 bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
838 int numElements = dd->ndim - d - 1;
839 ddSendrecv(dd, d, dddirForward,
840 extr_s + d, numElements,
841 extr_r + d, numElements);
843 if (receiveValidData)
845 for (int d1 = d; d1 < dd->ndim - 1; d1++)
847 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
848 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
849 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
854 const int numElementsInBuffer = pos;
855 for (int pulse = 0; pulse < numPulses; pulse++)
857 /* Communicate all the zone information backward */
858 bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
860 static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
862 int numReals = numElementsInBuffer*c_ddzoneNumReals;
863 ddSendrecv(dd, d, dddirBackward,
864 gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
865 gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
870 for (int d1 = d + 1; d1 < dd->ndim; d1++)
872 /* Determine the decrease of maximum required
873 * communication height along d1 due to the distance along d,
874 * this avoids a lot of useless atom communication.
876 real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
879 if (ddbox->tric_dir[dim])
881 /* c is the off-diagonal coupling between the cell planes
882 * along directions d and d1.
884 c = ddbox->v[dim][dd->dim[d1]][dim];
890 real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
893 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
897 /* A negative value signals out of range */
903 /* Accumulate the extremes over all pulses */
904 for (int i = 0; i < numElementsInBuffer; i++)
912 if (receiveValidData)
914 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
915 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
916 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
920 if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
928 if (receiveValidData && dh[d1] >= 0)
930 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
931 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
934 /* Copy the received buffer to the send buffer,
935 * to pass the data through with the next pulse.
939 if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
940 (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
942 /* Store the extremes */
945 for (int d1 = d; d1 < dd->ndim-1; d1++)
947 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
948 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
949 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
953 if (d == 1 || (d == 0 && dd->ndim == 3))
955 for (int i = d; i < 2; i++)
957 comm->zone_d2[1-d][i] = buf_e[pos];
963 comm->zone_d1[1] = buf_e[pos];
972 int dim = dd->dim[1];
973 for (int i = 0; i < 2; i++)
977 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
979 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
980 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
985 int dim = dd->dim[2];
986 for (int i = 0; i < 2; i++)
988 for (int j = 0; j < 2; j++)
992 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
994 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
995 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
999 for (int d = 1; d < dd->ndim; d++)
1001 cellsizes[d].fracLowerMax = extr_s[d-1][0];
1002 cellsizes[d].fracUpperMin = extr_s[d-1][1];
1005 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1006 d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
1011 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1012 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1014 rvec grid_s[2], *grid_r = nullptr, cx, r;
1015 char fname[STRLEN], buf[22];
1017 int a, i, d, z, y, x;
1021 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1022 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1026 snew(grid_r, 2*dd->nnodes);
1029 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1033 for (d = 0; d < DIM; d++)
1035 for (i = 0; i < DIM; i++)
1043 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1045 tric[d][i] = box[i][d]/box[i][i];
1054 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1055 out = gmx_fio_fopen(fname, "w");
1056 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1058 for (i = 0; i < dd->nnodes; i++)
1060 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1061 for (d = 0; d < DIM; d++)
1063 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1065 for (z = 0; z < 2; z++)
1067 for (y = 0; y < 2; y++)
1069 for (x = 0; x < 2; x++)
1071 cx[XX] = grid_r[i*2+x][XX];
1072 cx[YY] = grid_r[i*2+y][YY];
1073 cx[ZZ] = grid_r[i*2+z][ZZ];
1075 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1076 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1080 for (d = 0; d < DIM; d++)
1082 for (x = 0; x < 4; x++)
1086 case 0: y = 1 + i*8 + 2*x; break;
1087 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1088 case 2: y = 1 + i*8 + x; break;
1090 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1094 gmx_fio_fclose(out);
1099 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1100 const gmx_mtop_t *mtop, const t_commrec *cr,
1101 int natoms, const rvec x[], const matrix box)
1103 char fname[STRLEN], buf[22];
1106 const char *atomname, *resname;
1112 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1115 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1117 out = gmx_fio_fopen(fname, "w");
1119 fprintf(out, "TITLE %s\n", title);
1120 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1122 for (int i = 0; i < natoms; i++)
1124 int ii = dd->globalAtomIndices[i];
1125 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1128 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1131 while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
1137 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1139 b = dd->comm->zones.n;
1143 b = dd->comm->zones.n + 1;
1145 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1146 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1148 fprintf(out, "TER\n");
1150 gmx_fio_fclose(out);
1153 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1155 gmx_domdec_comm_t *comm;
1162 if (comm->bInterCGBondeds)
1164 if (comm->cutoff_mbody > 0)
1166 r = comm->cutoff_mbody;
1170 /* cutoff_mbody=0 means we do not have DLB */
1171 r = comm->cellsize_min[dd->dim[0]];
1172 for (di = 1; di < dd->ndim; di++)
1174 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1176 if (comm->bBondComm)
1178 r = std::max(r, comm->cutoff_mbody);
1182 r = std::min(r, comm->cutoff);
1190 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1194 r_mb = dd_cutoff_multibody(dd);
1196 return std::max(dd->comm->cutoff, r_mb);
1200 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1205 nc = dd->nc[dd->comm->cartpmedim];
1206 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1207 copy_ivec(coord, coord_pme);
1208 coord_pme[dd->comm->cartpmedim] =
1209 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1212 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1217 npme = dd->comm->npmenodes;
1219 /* Here we assign a PME node to communicate with this DD node
1220 * by assuming that the major index of both is x.
1221 * We add cr->npmenodes/2 to obtain an even distribution.
1223 return (ddindex*npme + npme/2)/npp;
1226 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1231 snew(pme_rank, dd->comm->npmenodes);
1233 for (i = 0; i < dd->nnodes; i++)
1235 p0 = ddindex2pmeindex(dd, i);
1236 p1 = ddindex2pmeindex(dd, i+1);
1237 if (i+1 == dd->nnodes || p1 > p0)
1241 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1243 pme_rank[n] = i + 1 + n;
1251 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1259 if (dd->comm->bCartesian) {
1260 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1261 dd_coords2pmecoords(dd,coords,coords_pme);
1262 copy_ivec(dd->ntot,nc);
1263 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1264 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1266 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1268 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1274 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1279 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1281 gmx_domdec_comm_t *comm;
1283 int ddindex, nodeid = -1;
1285 comm = cr->dd->comm;
1290 if (comm->bCartesianPP_PME)
1293 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1298 ddindex = dd_index(cr->dd->nc, coords);
1299 if (comm->bCartesianPP)
1301 nodeid = comm->ddindex2simnodeid[ddindex];
1307 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1319 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1320 const t_commrec gmx_unused *cr,
1325 const gmx_domdec_comm_t *comm = dd->comm;
1327 /* This assumes a uniform x domain decomposition grid cell size */
1328 if (comm->bCartesianPP_PME)
1331 ivec coord, coord_pme;
1332 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1333 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1335 /* This is a PP node */
1336 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1337 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1341 else if (comm->bCartesianPP)
1343 if (sim_nodeid < dd->nnodes)
1345 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1350 /* This assumes DD cells with identical x coordinates
1351 * are numbered sequentially.
1353 if (dd->comm->pmenodes == nullptr)
1355 if (sim_nodeid < dd->nnodes)
1357 /* The DD index equals the nodeid */
1358 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1364 while (sim_nodeid > dd->comm->pmenodes[i])
1368 if (sim_nodeid < dd->comm->pmenodes[i])
1370 pmenode = dd->comm->pmenodes[i];
1378 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1383 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1394 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1398 ivec coord, coord_pme;
1402 std::vector<int> ddranks;
1403 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1405 for (x = 0; x < dd->nc[XX]; x++)
1407 for (y = 0; y < dd->nc[YY]; y++)
1409 for (z = 0; z < dd->nc[ZZ]; z++)
1411 if (dd->comm->bCartesianPP_PME)
1416 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1417 if (dd->ci[XX] == coord_pme[XX] &&
1418 dd->ci[YY] == coord_pme[YY] &&
1419 dd->ci[ZZ] == coord_pme[ZZ])
1421 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1426 /* The slab corresponds to the nodeid in the PME group */
1427 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1429 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1438 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1440 gmx_bool bReceive = TRUE;
1442 if (cr->npmenodes < dd->nnodes)
1444 gmx_domdec_comm_t *comm = dd->comm;
1445 if (comm->bCartesianPP_PME)
1448 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1450 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1451 coords[comm->cartpmedim]++;
1452 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1455 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1456 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1458 /* This is not the last PP node for pmenode */
1463 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1468 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1469 if (cr->sim_nodeid+1 < cr->nnodes &&
1470 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1472 /* This is not the last PP node for pmenode */
1481 static void set_zones_ncg_home(gmx_domdec_t *dd)
1483 gmx_domdec_zones_t *zones;
1486 zones = &dd->comm->zones;
1488 zones->cg_range[0] = 0;
1489 for (i = 1; i < zones->n+1; i++)
1491 zones->cg_range[i] = dd->ncg_home;
1493 /* zone_ncg1[0] should always be equal to ncg_home */
1494 dd->comm->zone_ncg1[0] = dd->ncg_home;
1497 static void restoreAtomGroups(gmx_domdec_t *dd,
1498 const int *gcgs_index, const t_state *state)
1500 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1502 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1503 gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
1505 globalAtomGroupIndices.resize(atomGroupsState.size());
1506 atomGrouping.clear();
1508 /* Copy back the global charge group indices from state
1509 * and rebuild the local charge group to atom index.
1511 for (gmx::index i = 0; i < atomGroupsState.size(); i++)
1513 const int atomGroupGlobal = atomGroupsState[i];
1514 const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1515 globalAtomGroupIndices[i] = atomGroupGlobal;
1516 atomGrouping.appendBlock(groupSize);
1519 dd->ncg_home = atomGroupsState.size();
1520 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
1522 set_zones_ncg_home(dd);
1525 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1526 t_forcerec *fr, char *bLocalCG)
1528 cginfo_mb_t *cginfo_mb;
1534 cginfo_mb = fr->cginfo_mb;
1535 cginfo = fr->cginfo;
1537 for (cg = cg0; cg < cg1; cg++)
1539 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1543 if (bLocalCG != nullptr)
1545 for (cg = cg0; cg < cg1; cg++)
1547 bLocalCG[index_gl[cg]] = TRUE;
1552 static void make_dd_indices(gmx_domdec_t *dd,
1553 const int *gcgs_index, int cg_start)
1555 const int numZones = dd->comm->zones.n;
1556 const int *zone2cg = dd->comm->zones.cg_range;
1557 const int *zone_ncg1 = dd->comm->zone_ncg1;
1558 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1559 const gmx_bool bCGs = dd->comm->bCGs;
1561 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1563 if (zone2cg[1] != dd->ncg_home)
1565 gmx_incons("dd->ncg_zone is not up to date");
1568 /* Make the local to global and global to local atom index */
1569 int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
1570 globalAtomIndices.resize(a);
1571 for (int zone = 0; zone < numZones; zone++)
1580 cg0 = zone2cg[zone];
1582 int cg1 = zone2cg[zone+1];
1583 int cg1_p1 = cg0 + zone_ncg1[zone];
1585 for (int cg = cg0; cg < cg1; cg++)
1590 /* Signal that this cg is from more than one pulse away */
1593 int cg_gl = globalAtomGroupIndices[cg];
1596 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1598 globalAtomIndices.push_back(a_gl);
1599 ga2la_set(dd->ga2la, a_gl, a, zone1);
1605 globalAtomIndices.push_back(cg_gl);
1606 ga2la_set(dd->ga2la, cg_gl, a, zone1);
1613 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1617 if (bLocalCG == nullptr)
1621 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1623 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1626 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1631 for (int i = 0; i < ncg_sys; i++)
1638 if (ngl != dd->globalAtomGroupIndices.size())
1640 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1647 static void check_index_consistency(gmx_domdec_t *dd,
1648 int natoms_sys, int ncg_sys,
1653 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1655 if (dd->comm->DD_debug > 1)
1657 std::vector<int> have(natoms_sys);
1658 for (int a = 0; a < numAtomsInZones; a++)
1660 int globalAtomIndex = dd->globalAtomIndices[a];
1661 if (have[globalAtomIndex] > 0)
1663 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1667 have[globalAtomIndex] = a + 1;
1672 std::vector<int> have(numAtomsInZones);
1675 for (int i = 0; i < natoms_sys; i++)
1679 if (ga2la_get(dd->ga2la, i, &a, &cell))
1681 if (a >= numAtomsInZones)
1683 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1689 if (dd->globalAtomIndices[a] != i)
1691 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1698 if (ngl != numAtomsInZones)
1701 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1702 dd->rank, where, ngl, numAtomsInZones);
1704 for (int a = 0; a < numAtomsInZones; a++)
1709 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1710 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1714 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1718 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1719 dd->rank, where, nerr);
1723 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1724 static void clearDDStateIndices(gmx_domdec_t *dd,
1730 /* Clear the whole list without searching */
1731 ga2la_clear(dd->ga2la);
1735 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1736 for (int i = 0; i < numAtomsInZones; i++)
1738 ga2la_del(dd->ga2la, dd->globalAtomIndices[i]);
1742 char *bLocalCG = dd->comm->bLocalCG;
1745 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1747 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1751 dd_clear_local_vsite_indices(dd);
1753 if (dd->constraints)
1755 dd_clear_local_constraint_indices(dd);
1759 static bool check_grid_jump(gmx_int64_t step,
1760 const gmx_domdec_t *dd,
1762 const gmx_ddbox_t *ddbox,
1765 gmx_domdec_comm_t *comm = dd->comm;
1766 bool invalid = false;
1768 for (int d = 1; d < dd->ndim; d++)
1770 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
1771 const int dim = dd->dim[d];
1772 const real limit = grid_jump_limit(comm, cutoff, d);
1773 real bfac = ddbox->box_size[dim];
1774 if (ddbox->tric_dir[dim])
1776 bfac *= ddbox->skew_fac[dim];
1778 if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
1779 (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
1787 /* This error should never be triggered under normal
1788 * circumstances, but you never know ...
1790 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1791 gmx_step_str(step, buf),
1792 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1800 static float dd_force_load(gmx_domdec_comm_t *comm)
1807 if (comm->eFlop > 1)
1809 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1814 load = comm->cycl[ddCyclF];
1815 if (comm->cycl_n[ddCyclF] > 1)
1817 /* Subtract the maximum of the last n cycle counts
1818 * to get rid of possible high counts due to other sources,
1819 * for instance system activity, that would otherwise
1820 * affect the dynamic load balancing.
1822 load -= comm->cycl_max[ddCyclF];
1826 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1828 float gpu_wait, gpu_wait_sum;
1830 gpu_wait = comm->cycl[ddCyclWaitGPU];
1831 if (comm->cycl_n[ddCyclF] > 1)
1833 /* We should remove the WaitGPU time of the same MD step
1834 * as the one with the maximum F time, since the F time
1835 * and the wait time are not independent.
1836 * Furthermore, the step for the max F time should be chosen
1837 * the same on all ranks that share the same GPU.
1838 * But to keep the code simple, we remove the average instead.
1839 * The main reason for artificially long times at some steps
1840 * is spurious CPU activity or MPI time, so we don't expect
1841 * that changes in the GPU wait time matter a lot here.
1843 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
1845 /* Sum the wait times over the ranks that share the same GPU */
1846 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1847 comm->mpi_comm_gpu_shared);
1848 /* Replace the wait time by the average over the ranks */
1849 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1857 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1859 gmx_domdec_comm_t *comm;
1864 snew(*dim_f, dd->nc[dim]+1);
1866 for (i = 1; i < dd->nc[dim]; i++)
1868 if (comm->slb_frac[dim])
1870 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1874 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
1877 (*dim_f)[dd->nc[dim]] = 1;
1880 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1882 int pmeindex, slab, nso, i;
1885 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1891 ddpme->dim = dimind;
1893 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1895 ddpme->nslab = (ddpme->dim == 0 ?
1896 dd->comm->npmenodes_x :
1897 dd->comm->npmenodes_y);
1899 if (ddpme->nslab <= 1)
1904 nso = dd->comm->npmenodes/ddpme->nslab;
1905 /* Determine for each PME slab the PP location range for dimension dim */
1906 snew(ddpme->pp_min, ddpme->nslab);
1907 snew(ddpme->pp_max, ddpme->nslab);
1908 for (slab = 0; slab < ddpme->nslab; slab++)
1910 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1911 ddpme->pp_max[slab] = 0;
1913 for (i = 0; i < dd->nnodes; i++)
1915 ddindex2xyz(dd->nc, i, xyz);
1916 /* For y only use our y/z slab.
1917 * This assumes that the PME x grid size matches the DD grid size.
1919 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1921 pmeindex = ddindex2pmeindex(dd, i);
1924 slab = pmeindex/nso;
1928 slab = pmeindex % ddpme->nslab;
1930 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1931 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1935 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1938 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1940 if (dd->comm->ddpme[0].dim == XX)
1942 return dd->comm->ddpme[0].maxshift;
1950 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
1952 if (dd->comm->ddpme[0].dim == YY)
1954 return dd->comm->ddpme[0].maxshift;
1956 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
1958 return dd->comm->ddpme[1].maxshift;
1966 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
1968 rvec cell_ns_x0, rvec cell_ns_x1,
1971 gmx_domdec_comm_t *comm;
1976 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
1978 dim = dd->dim[dim_ind];
1980 /* Without PBC we don't have restrictions on the outer cells */
1981 if (!(dim >= ddbox->npbcdim &&
1982 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
1984 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
1985 comm->cellsize_min[dim])
1988 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
1989 gmx_step_str(step, buf), dim2char(dim),
1990 comm->cell_x1[dim] - comm->cell_x0[dim],
1991 ddbox->skew_fac[dim],
1992 dd->comm->cellsize_min[dim],
1993 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1997 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
1999 /* Communicate the boundaries and update cell_ns_x0/1 */
2000 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2001 if (isDlbOn(dd->comm) && dd->ndim > 1)
2003 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2008 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2010 /* Note that the cycles value can be incorrect, either 0 or some
2011 * extremely large value, when our thread migrated to another core
2012 * with an unsynchronized cycle counter. If this happens less often
2013 * that once per nstlist steps, this will not cause issues, since
2014 * we later subtract the maximum value from the sum over nstlist steps.
2015 * A zero count will slightly lower the total, but that's a small effect.
2016 * Note that the main purpose of the subtraction of the maximum value
2017 * is to avoid throwing off the load balancing when stalls occur due
2018 * e.g. system activity or network congestion.
2020 dd->comm->cycl[ddCycl] += cycles;
2021 dd->comm->cycl_n[ddCycl]++;
2022 if (cycles > dd->comm->cycl_max[ddCycl])
2024 dd->comm->cycl_max[ddCycl] = cycles;
2028 static double force_flop_count(t_nrnb *nrnb)
2035 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2037 /* To get closer to the real timings, we half the count
2038 * for the normal loops and again half it for water loops.
2041 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2043 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2047 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2050 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2053 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2055 sum += nrnb->n[i]*cost_nrnb(i);
2058 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2060 sum += nrnb->n[i]*cost_nrnb(i);
2066 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2068 if (dd->comm->eFlop)
2070 dd->comm->flop -= force_flop_count(nrnb);
2073 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2075 if (dd->comm->eFlop)
2077 dd->comm->flop += force_flop_count(nrnb);
2082 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2086 for (i = 0; i < ddCyclNr; i++)
2088 dd->comm->cycl[i] = 0;
2089 dd->comm->cycl_n[i] = 0;
2090 dd->comm->cycl_max[i] = 0;
2093 dd->comm->flop_n = 0;
2096 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2098 gmx_domdec_comm_t *comm;
2099 domdec_load_t *load;
2100 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2105 fprintf(debug, "get_load_distribution start\n");
2108 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2112 bSepPME = (dd->pme_nodeid >= 0);
2114 if (dd->ndim == 0 && bSepPME)
2116 /* Without decomposition, but with PME nodes, we need the load */
2117 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2118 comm->load[0].pme = comm->cycl[ddCyclPME];
2121 for (int d = dd->ndim - 1; d >= 0; d--)
2123 const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
2124 const int dim = dd->dim[d];
2125 /* Check if we participate in the communication in this dimension */
2126 if (d == dd->ndim-1 ||
2127 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2129 load = &comm->load[d];
2130 if (isDlbOn(dd->comm))
2132 cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
2135 if (d == dd->ndim-1)
2137 sbuf[pos++] = dd_force_load(comm);
2138 sbuf[pos++] = sbuf[0];
2139 if (isDlbOn(dd->comm))
2141 sbuf[pos++] = sbuf[0];
2142 sbuf[pos++] = cell_frac;
2145 sbuf[pos++] = cellsizes.fracLowerMax;
2146 sbuf[pos++] = cellsizes.fracUpperMin;
2151 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2152 sbuf[pos++] = comm->cycl[ddCyclPME];
2157 sbuf[pos++] = comm->load[d+1].sum;
2158 sbuf[pos++] = comm->load[d+1].max;
2159 if (isDlbOn(dd->comm))
2161 sbuf[pos++] = comm->load[d+1].sum_m;
2162 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2163 sbuf[pos++] = comm->load[d+1].flags;
2166 sbuf[pos++] = cellsizes.fracLowerMax;
2167 sbuf[pos++] = cellsizes.fracUpperMin;
2172 sbuf[pos++] = comm->load[d+1].mdf;
2173 sbuf[pos++] = comm->load[d+1].pme;
2177 /* Communicate a row in DD direction d.
2178 * The communicators are setup such that the root always has rank 0.
2181 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2182 load->load, load->nload*sizeof(float), MPI_BYTE,
2183 0, comm->mpi_comm_load[d]);
2185 if (dd->ci[dim] == dd->master_ci[dim])
2187 /* We are the master along this row, process this row */
2188 RowMaster *rowMaster = nullptr;
2192 rowMaster = cellsizes.rowMaster.get();
2202 for (int i = 0; i < dd->nc[dim]; i++)
2204 load->sum += load->load[pos++];
2205 load->max = std::max(load->max, load->load[pos]);
2207 if (isDlbOn(dd->comm))
2209 if (rowMaster->dlbIsLimited)
2211 /* This direction could not be load balanced properly,
2212 * therefore we need to use the maximum iso the average load.
2214 load->sum_m = std::max(load->sum_m, load->load[pos]);
2218 load->sum_m += load->load[pos];
2221 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2225 load->flags = (int)(load->load[pos++] + 0.5);
2229 rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
2230 rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
2235 load->mdf = std::max(load->mdf, load->load[pos]);
2237 load->pme = std::max(load->pme, load->load[pos]);
2241 if (isDlbOn(comm) && rowMaster->dlbIsLimited)
2243 load->sum_m *= dd->nc[dim];
2244 load->flags |= (1<<d);
2252 comm->nload += dd_load_count(comm);
2253 comm->load_step += comm->cycl[ddCyclStep];
2254 comm->load_sum += comm->load[0].sum;
2255 comm->load_max += comm->load[0].max;
2258 for (int d = 0; d < dd->ndim; d++)
2260 if (comm->load[0].flags & (1<<d))
2262 comm->load_lim[d]++;
2268 comm->load_mdf += comm->load[0].mdf;
2269 comm->load_pme += comm->load[0].pme;
2273 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2277 fprintf(debug, "get_load_distribution finished\n");
2281 static float dd_force_load_fraction(gmx_domdec_t *dd)
2283 /* Return the relative performance loss on the total run time
2284 * due to the force calculation load imbalance.
2286 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2288 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2296 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2298 /* Return the relative performance loss on the total run time
2299 * due to the force calculation load imbalance.
2301 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2304 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2305 (dd->comm->load_step*dd->nnodes);
2313 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2315 gmx_domdec_comm_t *comm = dd->comm;
2317 /* Only the master rank prints loads and only if we measured loads */
2318 if (!DDMASTER(dd) || comm->nload == 0)
2324 int numPpRanks = dd->nnodes;
2325 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2326 int numRanks = numPpRanks + numPmeRanks;
2327 float lossFraction = 0;
2329 /* Print the average load imbalance and performance loss */
2330 if (dd->nnodes > 1 && comm->load_sum > 0)
2332 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2333 lossFraction = dd_force_imb_perf_loss(dd);
2335 std::string msg = "\n Dynamic load balancing report:\n";
2336 std::string dlbStateStr;
2338 switch (dd->comm->dlbState)
2341 dlbStateStr = "DLB was off during the run per user request.";
2343 case edlbsOffForever:
2344 /* Currectly this can happen due to performance loss observed, cell size
2345 * limitations or incompatibility with other settings observed during
2346 * determineInitialDlbState(). */
2347 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2349 case edlbsOffCanTurnOn:
2350 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2352 case edlbsOffTemporarilyLocked:
2353 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2355 case edlbsOnCanTurnOff:
2356 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2359 dlbStateStr = "DLB was permanently on during the run per user request.";
2362 GMX_ASSERT(false, "Undocumented DLB state");
2365 msg += " " + dlbStateStr + "\n";
2366 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2367 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2368 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2369 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2371 fprintf(fplog, "%s", msg.c_str());
2372 fprintf(stderr, "%s", msg.c_str());
2375 /* Print during what percentage of steps the load balancing was limited */
2376 bool dlbWasLimited = false;
2379 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2380 for (int d = 0; d < dd->ndim; d++)
2382 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2383 sprintf(buf+strlen(buf), " %c %d %%",
2384 dim2char(dd->dim[d]), limitPercentage);
2385 if (limitPercentage >= 50)
2387 dlbWasLimited = true;
2390 sprintf(buf + strlen(buf), "\n");
2391 fprintf(fplog, "%s", buf);
2392 fprintf(stderr, "%s", buf);
2395 /* Print the performance loss due to separate PME - PP rank imbalance */
2396 float lossFractionPme = 0;
2397 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2399 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2400 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2401 if (lossFractionPme <= 0)
2403 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2407 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2409 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2410 fprintf(fplog, "%s", buf);
2411 fprintf(stderr, "%s", buf);
2412 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
2413 fprintf(fplog, "%s", buf);
2414 fprintf(stderr, "%s", buf);
2416 fprintf(fplog, "\n");
2417 fprintf(stderr, "\n");
2419 if (lossFraction >= DD_PERF_LOSS_WARN)
2422 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2423 " in the domain decomposition.\n", lossFraction*100);
2426 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2428 else if (dlbWasLimited)
2430 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2432 fprintf(fplog, "%s\n", buf);
2433 fprintf(stderr, "%s\n", buf);
2435 if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2438 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2439 " had %s work to do than the PP ranks.\n"
2440 " You might want to %s the number of PME ranks\n"
2441 " or %s the cut-off and the grid spacing.\n",
2442 std::fabs(lossFractionPme*100),
2443 (lossFractionPme < 0) ? "less" : "more",
2444 (lossFractionPme < 0) ? "decrease" : "increase",
2445 (lossFractionPme < 0) ? "decrease" : "increase");
2446 fprintf(fplog, "%s\n", buf);
2447 fprintf(stderr, "%s\n", buf);
2451 static float dd_vol_min(gmx_domdec_t *dd)
2453 return dd->comm->load[0].cvol_min*dd->nnodes;
2456 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
2458 return dd->comm->load[0].flags;
2461 static float dd_f_imbal(gmx_domdec_t *dd)
2463 if (dd->comm->load[0].sum > 0)
2465 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2469 /* Something is wrong in the cycle counting, report no load imbalance */
2474 float dd_pme_f_ratio(gmx_domdec_t *dd)
2476 /* Should only be called on the DD master rank */
2477 assert(DDMASTER(dd));
2479 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2481 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2489 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
2494 flags = dd_load_flags(dd);
2498 "DD load balancing is limited by minimum cell size in dimension");
2499 for (d = 0; d < dd->ndim; d++)
2503 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2506 fprintf(fplog, "\n");
2508 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2509 if (isDlbOn(dd->comm))
2511 fprintf(fplog, " vol min/aver %5.3f%c",
2512 dd_vol_min(dd), flags ? '!' : ' ');
2516 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2518 if (dd->comm->cycl_n[ddCyclPME])
2520 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2522 fprintf(fplog, "\n\n");
2525 static void dd_print_load_verbose(gmx_domdec_t *dd)
2527 if (isDlbOn(dd->comm))
2529 fprintf(stderr, "vol %4.2f%c ",
2530 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2534 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
2536 if (dd->comm->cycl_n[ddCyclPME])
2538 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2543 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2548 gmx_bool bPartOfGroup = FALSE;
2550 dim = dd->dim[dim_ind];
2551 copy_ivec(loc, loc_c);
2552 for (i = 0; i < dd->nc[dim]; i++)
2555 rank = dd_index(dd->nc, loc_c);
2556 if (rank == dd->rank)
2558 /* This process is part of the group */
2559 bPartOfGroup = TRUE;
2562 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2566 dd->comm->mpi_comm_load[dim_ind] = c_row;
2567 if (!isDlbDisabled(dd->comm))
2569 DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
2571 if (dd->ci[dim] == dd->master_ci[dim])
2573 /* This is the root process of this row */
2574 cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
2576 RowMaster &rowMaster = *cellsizes.rowMaster;
2577 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
2578 rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
2579 rowMaster.isCellMin.resize(dd->nc[dim]);
2582 rowMaster.bounds.resize(dd->nc[dim]);
2584 rowMaster.buf_ncd.resize(dd->nc[dim]);
2588 /* This is not a root process, we only need to receive cell_f */
2589 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
2592 if (dd->ci[dim] == dd->master_ci[dim])
2594 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2600 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2604 int physicalnode_id_hash;
2606 MPI_Comm mpi_comm_pp_physicalnode;
2608 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2610 /* Only ranks with short-ranged tasks (currently) use GPUs.
2611 * If we don't have GPUs assigned, there are no resources to share.
2616 physicalnode_id_hash = gmx_physicalnode_id_hash();
2622 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2623 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2624 dd->rank, physicalnode_id_hash, gpu_id);
2626 /* Split the PP communicator over the physical nodes */
2627 /* TODO: See if we should store this (before), as it's also used for
2628 * for the nodecomm summation.
2630 // TODO PhysicalNodeCommunicator could be extended/used to handle
2631 // the need for per-node per-group communicators.
2632 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2633 &mpi_comm_pp_physicalnode);
2634 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2635 &dd->comm->mpi_comm_gpu_shared);
2636 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2637 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2641 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2644 /* Note that some ranks could share a GPU, while others don't */
2646 if (dd->comm->nrank_gpu_shared == 1)
2648 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2651 GMX_UNUSED_VALUE(cr);
2652 GMX_UNUSED_VALUE(gpu_id);
2656 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2659 int dim0, dim1, i, j;
2664 fprintf(debug, "Making load communicators\n");
2667 snew(dd->comm->load, std::max(dd->ndim, 1));
2668 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2676 make_load_communicator(dd, 0, loc);
2680 for (i = 0; i < dd->nc[dim0]; i++)
2683 make_load_communicator(dd, 1, loc);
2689 for (i = 0; i < dd->nc[dim0]; i++)
2693 for (j = 0; j < dd->nc[dim1]; j++)
2696 make_load_communicator(dd, 2, loc);
2703 fprintf(debug, "Finished making load communicators\n");
2708 /*! \brief Sets up the relation between neighboring domains and zones */
2709 static void setup_neighbor_relations(gmx_domdec_t *dd)
2711 int d, dim, i, j, m;
2713 gmx_domdec_zones_t *zones;
2714 gmx_domdec_ns_ranges_t *izone;
2716 for (d = 0; d < dd->ndim; d++)
2719 copy_ivec(dd->ci, tmp);
2720 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2721 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2722 copy_ivec(dd->ci, tmp);
2723 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2724 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2727 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2730 dd->neighbor[d][1]);
2734 int nzone = (1 << dd->ndim);
2735 int nizone = (1 << std::max(dd->ndim - 1, 0));
2736 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2738 zones = &dd->comm->zones;
2740 for (i = 0; i < nzone; i++)
2743 clear_ivec(zones->shift[i]);
2744 for (d = 0; d < dd->ndim; d++)
2746 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2751 for (i = 0; i < nzone; i++)
2753 for (d = 0; d < DIM; d++)
2755 s[d] = dd->ci[d] - zones->shift[i][d];
2760 else if (s[d] >= dd->nc[d])
2766 zones->nizone = nizone;
2767 for (i = 0; i < zones->nizone; i++)
2769 assert(ddNonbondedZonePairRanges[i][0] == i);
2771 izone = &zones->izone[i];
2772 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2773 * j-zones up to nzone.
2775 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2776 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2777 for (dim = 0; dim < DIM; dim++)
2779 if (dd->nc[dim] == 1)
2781 /* All shifts should be allowed */
2782 izone->shift0[dim] = -1;
2783 izone->shift1[dim] = 1;
2787 /* Determine the min/max j-zone shift wrt the i-zone */
2788 izone->shift0[dim] = 1;
2789 izone->shift1[dim] = -1;
2790 for (j = izone->j0; j < izone->j1; j++)
2792 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2793 if (shift_diff < izone->shift0[dim])
2795 izone->shift0[dim] = shift_diff;
2797 if (shift_diff > izone->shift1[dim])
2799 izone->shift1[dim] = shift_diff;
2806 if (!isDlbDisabled(dd->comm))
2808 dd->comm->cellsizesWithDlb.resize(dd->ndim);
2811 if (dd->comm->bRecordLoad)
2813 make_load_communicators(dd);
2817 static void make_pp_communicator(FILE *fplog,
2819 t_commrec gmx_unused *cr,
2820 int gmx_unused reorder)
2823 gmx_domdec_comm_t *comm;
2830 if (comm->bCartesianPP)
2832 /* Set up cartesian communication for the particle-particle part */
2835 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2836 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2839 for (int i = 0; i < DIM; i++)
2843 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
2845 /* We overwrite the old communicator with the new cartesian one */
2846 cr->mpi_comm_mygroup = comm_cart;
2849 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2850 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2852 if (comm->bCartesianPP_PME)
2854 /* Since we want to use the original cartesian setup for sim,
2855 * and not the one after split, we need to make an index.
2857 snew(comm->ddindex2ddnodeid, dd->nnodes);
2858 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2859 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2860 /* Get the rank of the DD master,
2861 * above we made sure that the master node is a PP node.
2871 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2873 else if (comm->bCartesianPP)
2875 if (cr->npmenodes == 0)
2877 /* The PP communicator is also
2878 * the communicator for this simulation
2880 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2882 cr->nodeid = dd->rank;
2884 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2886 /* We need to make an index to go from the coordinates
2887 * to the nodeid of this simulation.
2889 snew(comm->ddindex2simnodeid, dd->nnodes);
2890 snew(buf, dd->nnodes);
2891 if (thisRankHasDuty(cr, DUTY_PP))
2893 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2895 /* Communicate the ddindex to simulation nodeid index */
2896 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2897 cr->mpi_comm_mysim);
2900 /* Determine the master coordinates and rank.
2901 * The DD master should be the same node as the master of this sim.
2903 for (int i = 0; i < dd->nnodes; i++)
2905 if (comm->ddindex2simnodeid[i] == 0)
2907 ddindex2xyz(dd->nc, i, dd->master_ci);
2908 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2913 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2918 /* No Cartesian communicators */
2919 /* We use the rank in dd->comm->all as DD index */
2920 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2921 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2923 clear_ivec(dd->master_ci);
2930 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2931 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2936 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2937 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2941 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2945 gmx_domdec_comm_t *comm = dd->comm;
2947 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
2950 snew(comm->ddindex2simnodeid, dd->nnodes);
2951 snew(buf, dd->nnodes);
2952 if (thisRankHasDuty(cr, DUTY_PP))
2954 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2956 /* Communicate the ddindex to simulation nodeid index */
2957 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2958 cr->mpi_comm_mysim);
2962 GMX_UNUSED_VALUE(dd);
2963 GMX_UNUSED_VALUE(cr);
2967 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
2968 DdRankOrder gmx_unused rankOrder,
2969 int gmx_unused reorder)
2971 gmx_domdec_comm_t *comm;
2980 if (comm->bCartesianPP)
2982 for (i = 1; i < DIM; i++)
2984 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
2986 if (bDiv[YY] || bDiv[ZZ])
2988 comm->bCartesianPP_PME = TRUE;
2989 /* If we have 2D PME decomposition, which is always in x+y,
2990 * we stack the PME only nodes in z.
2991 * Otherwise we choose the direction that provides the thinnest slab
2992 * of PME only nodes as this will have the least effect
2993 * on the PP communication.
2994 * But for the PME communication the opposite might be better.
2996 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
2998 dd->nc[YY] > dd->nc[ZZ]))
3000 comm->cartpmedim = ZZ;
3004 comm->cartpmedim = YY;
3006 comm->ntot[comm->cartpmedim]
3007 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3011 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3013 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3017 if (comm->bCartesianPP_PME)
3025 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3028 for (i = 0; i < DIM; i++)
3032 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
3034 MPI_Comm_rank(comm_cart, &rank);
3035 if (MASTER(cr) && rank != 0)
3037 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3040 /* With this assigment we loose the link to the original communicator
3041 * which will usually be MPI_COMM_WORLD, unless have multisim.
3043 cr->mpi_comm_mysim = comm_cart;
3044 cr->sim_nodeid = rank;
3046 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3050 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3051 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3054 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3058 if (cr->npmenodes == 0 ||
3059 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3061 cr->duty = DUTY_PME;
3064 /* Split the sim communicator into PP and PME only nodes */
3065 MPI_Comm_split(cr->mpi_comm_mysim,
3066 getThisRankDuties(cr),
3067 dd_index(comm->ntot, dd->ci),
3068 &cr->mpi_comm_mygroup);
3075 case DdRankOrder::pp_pme:
3078 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3081 case DdRankOrder::interleave:
3082 /* Interleave the PP-only and PME-only ranks */
3085 fprintf(fplog, "Interleaving PP and PME ranks\n");
3087 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3089 case DdRankOrder::cartesian:
3092 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3095 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3097 cr->duty = DUTY_PME;
3104 /* Split the sim communicator into PP and PME only nodes */
3105 MPI_Comm_split(cr->mpi_comm_mysim,
3106 getThisRankDuties(cr),
3108 &cr->mpi_comm_mygroup);
3109 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3115 fprintf(fplog, "This rank does only %s work.\n\n",
3116 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3120 /*! \brief Generates the MPI communicators for domain decomposition */
3121 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3122 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3124 gmx_domdec_comm_t *comm;
3129 copy_ivec(dd->nc, comm->ntot);
3131 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3132 comm->bCartesianPP_PME = FALSE;
3134 /* Reorder the nodes by default. This might change the MPI ranks.
3135 * Real reordering is only supported on very few architectures,
3136 * Blue Gene is one of them.
3138 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
3140 if (cr->npmenodes > 0)
3142 /* Split the communicator into a PP and PME part */
3143 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3144 if (comm->bCartesianPP_PME)
3146 /* We (possibly) reordered the nodes in split_communicator,
3147 * so it is no longer required in make_pp_communicator.
3149 CartReorder = FALSE;
3154 /* All nodes do PP and PME */
3156 /* We do not require separate communicators */
3157 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3161 if (thisRankHasDuty(cr, DUTY_PP))
3163 /* Copy or make a new PP communicator */
3164 make_pp_communicator(fplog, dd, cr, CartReorder);
3168 receive_ddindex2simnodeid(dd, cr);
3171 if (!thisRankHasDuty(cr, DUTY_PME))
3173 /* Set up the commnuication to our PME node */
3174 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3175 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3178 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
3179 dd->pme_nodeid, dd->pme_receive_vir_ener);
3184 dd->pme_nodeid = -1;
3189 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3191 comm->cgs_gl.index[comm->cgs_gl.nr]);
3195 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3197 real *slb_frac, tot;
3202 if (nc > 1 && size_string != nullptr)
3206 fprintf(fplog, "Using static load balancing for the %s direction\n",
3211 for (i = 0; i < nc; i++)
3214 sscanf(size_string, "%20lf%n", &dbl, &n);
3217 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3226 fprintf(fplog, "Relative cell sizes:");
3228 for (i = 0; i < nc; i++)
3233 fprintf(fplog, " %5.3f", slb_frac[i]);
3238 fprintf(fplog, "\n");
3245 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3248 gmx_mtop_ilistloop_t iloop;
3252 iloop = gmx_mtop_ilistloop_init(mtop);
3253 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3255 for (ftype = 0; ftype < F_NRE; ftype++)
3257 if ((interaction_function[ftype].flags & IF_BOND) &&
3260 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3268 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3274 val = getenv(env_var);
3277 if (sscanf(val, "%20d", &nst) <= 0)
3283 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3291 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3295 fprintf(stderr, "\n%s\n", warn_string);
3299 fprintf(fplog, "\n%s\n", warn_string);
3303 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3304 const t_inputrec *ir, FILE *fplog)
3306 if (ir->ePBC == epbcSCREW &&
3307 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3309 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3312 if (ir->ns_type == ensSIMPLE)
3314 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3317 if (ir->nstlist == 0)
3319 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3322 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3324 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3328 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3333 r = ddbox->box_size[XX];
3334 for (di = 0; di < dd->ndim; di++)
3337 /* Check using the initial average cell size */
3338 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3344 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3346 static int forceDlbOffOrBail(int cmdlineDlbState,
3347 const std::string &reasonStr,
3351 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3352 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3354 if (cmdlineDlbState == edlbsOnUser)
3356 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
3358 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3360 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3362 return edlbsOffForever;
3365 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3367 * This function parses the parameters of "-dlb" command line option setting
3368 * corresponding state values. Then it checks the consistency of the determined
3369 * state with other run parameters and settings. As a result, the initial state
3370 * may be altered or an error may be thrown if incompatibility of options is detected.
3372 * \param [in] fplog Pointer to mdrun log file.
3373 * \param [in] cr Pointer to MPI communication object.
3374 * \param [in] dlbOption Enum value for the DLB option.
3375 * \param [in] bRecordLoad True if the load balancer is recording load information.
3376 * \param [in] mdrunOptions Options for mdrun.
3377 * \param [in] ir Pointer mdrun to input parameters.
3378 * \returns DLB initial/startup state.
3380 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3381 DlbOption dlbOption, gmx_bool bRecordLoad,
3382 const MdrunOptions &mdrunOptions,
3383 const t_inputrec *ir)
3385 int dlbState = edlbsOffCanTurnOn;
3389 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3390 case DlbOption::no: dlbState = edlbsOffUser; break;
3391 case DlbOption::yes: dlbState = edlbsOnUser; break;
3392 default: gmx_incons("Invalid dlbOption enum value");
3395 /* Reruns don't support DLB: bail or override auto mode */
3396 if (mdrunOptions.rerun)
3398 std::string reasonStr = "it is not supported in reruns.";
3399 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3402 /* Unsupported integrators */
3403 if (!EI_DYNAMICS(ir->eI))
3405 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3406 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3409 /* Without cycle counters we can't time work to balance on */
3412 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3413 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3416 if (mdrunOptions.reproducible)
3418 std::string reasonStr = "you started a reproducible run.";
3423 case edlbsOffForever:
3424 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3426 case edlbsOffCanTurnOn:
3427 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3429 case edlbsOnCanTurnOff:
3430 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3433 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3436 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3444 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3447 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3449 /* Decomposition order z,y,x */
3452 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3454 for (int dim = DIM-1; dim >= 0; dim--)
3456 if (dd->nc[dim] > 1)
3458 dd->dim[dd->ndim++] = dim;
3464 /* Decomposition order x,y,z */
3465 for (int dim = 0; dim < DIM; dim++)
3467 if (dd->nc[dim] > 1)
3469 dd->dim[dd->ndim++] = dim;
3476 /* Set dim[0] to avoid extra checks on ndim in several places */
3481 static gmx_domdec_comm_t *init_dd_comm()
3483 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3485 comm->n_load_have = 0;
3486 comm->n_load_collect = 0;
3488 comm->haveTurnedOffDlb = false;
3490 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3492 comm->sum_nat[i] = 0;
3496 comm->load_step = 0;
3499 clear_ivec(comm->load_lim);
3503 /* This should be replaced by a unique pointer */
3504 comm->balanceRegion = ddBalanceRegionAllocate();
3509 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3510 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3511 const DomdecOptions &options,
3512 const MdrunOptions &mdrunOptions,
3513 const gmx_mtop_t *mtop,
3514 const t_inputrec *ir,
3516 gmx::ArrayRef<const gmx::RVec> xGlobal,
3520 real r_bonded_limit = -1;
3521 const real tenPercentMargin = 1.1;
3522 gmx_domdec_comm_t *comm = dd->comm;
3524 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3525 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3527 dd->pme_recv_f_alloc = 0;
3528 dd->pme_recv_f_buf = nullptr;
3530 /* Initialize to GPU share count to 0, might change later */
3531 comm->nrank_gpu_shared = 0;
3533 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3534 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3535 /* To consider turning DLB on after 2*nstlist steps we need to check
3536 * at partitioning count 3. Thus we need to increase the first count by 2.
3538 comm->ddPartioningCountFirstDlbOff += 2;
3542 fprintf(fplog, "Dynamic load balancing: %s\n",
3543 edlbs_names[comm->dlbState]);
3545 comm->bPMELoadBalDLBLimits = FALSE;
3547 /* Allocate the charge group/atom sorting struct */
3548 comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
3550 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3552 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3553 mtop->bIntermolecularInteractions);
3554 if (comm->bInterCGBondeds)
3556 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3560 comm->bInterCGMultiBody = FALSE;
3563 dd->bInterCGcons = inter_charge_group_constraints(*mtop);
3564 dd->bInterCGsettles = inter_charge_group_settles(*mtop);
3568 /* Set the cut-off to some very large value,
3569 * so we don't need if statements everywhere in the code.
3570 * We use sqrt, since the cut-off is squared in some places.
3572 comm->cutoff = GMX_CUTOFF_INF;
3576 comm->cutoff = ir->rlist;
3578 comm->cutoff_mbody = 0;
3580 comm->cellsize_limit = 0;
3581 comm->bBondComm = FALSE;
3583 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3584 * within nstlist steps. Since boundaries are allowed to displace by half
3585 * a cell size, DD cells should be at least the size of the list buffer.
3587 comm->cellsize_limit = std::max(comm->cellsize_limit,
3588 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3590 if (comm->bInterCGBondeds)
3592 if (options.minimumCommunicationRange > 0)
3594 comm->cutoff_mbody = options.minimumCommunicationRange;
3595 if (options.useBondedCommunication)
3597 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3601 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3603 r_bonded_limit = comm->cutoff_mbody;
3605 else if (ir->bPeriodicMols)
3607 /* Can not easily determine the required cut-off */
3608 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3609 comm->cutoff_mbody = comm->cutoff/2;
3610 r_bonded_limit = comm->cutoff_mbody;
3618 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3619 options.checkBondedInteractions,
3622 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3623 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3625 /* We use an initial margin of 10% for the minimum cell size,
3626 * except when we are just below the non-bonded cut-off.
3628 if (options.useBondedCommunication)
3630 if (std::max(r_2b, r_mb) > comm->cutoff)
3632 r_bonded = std::max(r_2b, r_mb);
3633 r_bonded_limit = tenPercentMargin*r_bonded;
3634 comm->bBondComm = TRUE;
3639 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3641 /* We determine cutoff_mbody later */
3645 /* No special bonded communication,
3646 * simply increase the DD cut-off.
3648 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3649 comm->cutoff_mbody = r_bonded_limit;
3650 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3656 "Minimum cell size due to bonded interactions: %.3f nm\n",
3659 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3663 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3665 /* There is a cell size limit due to the constraints (P-LINCS) */
3666 rconstr = constr_r_max(fplog, mtop, ir);
3670 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3672 if (rconstr > comm->cellsize_limit)
3674 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3678 else if (options.constraintCommunicationRange > 0 && fplog)
3680 /* Here we do not check for dd->bInterCGcons,
3681 * because one can also set a cell size limit for virtual sites only
3682 * and at this point we don't know yet if there are intercg v-sites.
3685 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3686 options.constraintCommunicationRange);
3687 rconstr = options.constraintCommunicationRange;
3689 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3691 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3693 if (options.numCells[XX] > 0)
3695 copy_ivec(options.numCells, dd->nc);
3696 set_dd_dim(fplog, dd);
3697 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3699 if (options.numPmeRanks >= 0)
3701 cr->npmenodes = options.numPmeRanks;
3705 /* When the DD grid is set explicitly and -npme is set to auto,
3706 * don't use PME ranks. We check later if the DD grid is
3707 * compatible with the total number of ranks.
3712 real acs = average_cellsize_min(dd, ddbox);
3713 if (acs < comm->cellsize_limit)
3717 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3719 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3720 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3721 acs, comm->cellsize_limit);
3726 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3728 /* We need to choose the optimal DD grid and possibly PME nodes */
3730 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3731 options.numPmeRanks,
3732 !isDlbDisabled(comm),
3734 comm->cellsize_limit, comm->cutoff,
3735 comm->bInterCGBondeds);
3737 if (dd->nc[XX] == 0)
3740 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3741 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3742 !bC ? "-rdd" : "-rcon",
3743 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3744 bC ? " or your LINCS settings" : "");
3746 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3747 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3749 "Look in the log file for details on the domain decomposition",
3750 cr->nnodes-cr->npmenodes, limit, buf);
3752 set_dd_dim(fplog, dd);
3758 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3759 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3762 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3763 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3765 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3766 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3767 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3769 if (cr->npmenodes > dd->nnodes)
3771 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3772 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3774 if (cr->npmenodes > 0)
3776 comm->npmenodes = cr->npmenodes;
3780 comm->npmenodes = dd->nnodes;
3783 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3785 /* The following choices should match those
3786 * in comm_cost_est in domdec_setup.c.
3787 * Note that here the checks have to take into account
3788 * that the decomposition might occur in a different order than xyz
3789 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3790 * in which case they will not match those in comm_cost_est,
3791 * but since that is mainly for testing purposes that's fine.
3793 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3794 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3795 getenv("GMX_PMEONEDD") == nullptr)
3797 comm->npmedecompdim = 2;
3798 comm->npmenodes_x = dd->nc[XX];
3799 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3803 /* In case nc is 1 in both x and y we could still choose to
3804 * decompose pme in y instead of x, but we use x for simplicity.
3806 comm->npmedecompdim = 1;
3807 if (dd->dim[0] == YY)
3809 comm->npmenodes_x = 1;
3810 comm->npmenodes_y = comm->npmenodes;
3814 comm->npmenodes_x = comm->npmenodes;
3815 comm->npmenodes_y = 1;
3820 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3821 comm->npmenodes_x, comm->npmenodes_y, 1);
3826 comm->npmedecompdim = 0;
3827 comm->npmenodes_x = 0;
3828 comm->npmenodes_y = 0;
3831 snew(comm->slb_frac, DIM);
3832 if (isDlbDisabled(comm))
3834 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3835 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3836 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3839 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3841 if (comm->bBondComm || !isDlbDisabled(comm))
3843 /* Set the bonded communication distance to halfway
3844 * the minimum and the maximum,
3845 * since the extra communication cost is nearly zero.
3847 real acs = average_cellsize_min(dd, ddbox);
3848 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3849 if (!isDlbDisabled(comm))
3851 /* Check if this does not limit the scaling */
3852 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3853 options.dlbScaling*acs);
3855 if (!comm->bBondComm)
3857 /* Without bBondComm do not go beyond the n.b. cut-off */
3858 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3859 if (comm->cellsize_limit >= comm->cutoff)
3861 /* We don't loose a lot of efficieny
3862 * when increasing it to the n.b. cut-off.
3863 * It can even be slightly faster, because we need
3864 * less checks for the communication setup.
3866 comm->cutoff_mbody = comm->cutoff;
3869 /* Check if we did not end up below our original limit */
3870 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3872 if (comm->cutoff_mbody > comm->cellsize_limit)
3874 comm->cellsize_limit = comm->cutoff_mbody;
3877 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3882 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
3883 "cellsize limit %f\n",
3884 comm->bBondComm, comm->cellsize_limit);
3889 check_dd_restrictions(cr, dd, ir, fplog);
3893 static void set_dlb_limits(gmx_domdec_t *dd)
3896 for (int d = 0; d < dd->ndim; d++)
3898 /* Set the number of pulses to the value for DLB */
3899 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3901 dd->comm->cellsize_min[dd->dim[d]] =
3902 dd->comm->cellsize_min_dlb[dd->dim[d]];
3907 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3909 gmx_domdec_t *dd = cr->dd;
3910 gmx_domdec_comm_t *comm = dd->comm;
3912 real cellsize_min = comm->cellsize_min[dd->dim[0]];
3913 for (int d = 1; d < dd->ndim; d++)
3915 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3918 /* Turn off DLB if we're too close to the cell size limit. */
3919 if (cellsize_min < comm->cellsize_limit*1.05)
3921 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3922 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3923 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3924 dd_warning(cr, fplog, str.c_str());
3926 comm->dlbState = edlbsOffForever;
3931 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3932 dd_warning(cr, fplog, buf);
3933 comm->dlbState = edlbsOnCanTurnOff;
3935 /* Store the non-DLB performance, so we can check if DLB actually
3936 * improves performance.
3938 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3939 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3943 /* We can set the required cell size info here,
3944 * so we do not need to communicate this.
3945 * The grid is completely uniform.
3947 for (int d = 0; d < dd->ndim; d++)
3949 RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
3953 comm->load[d].sum_m = comm->load[d].sum;
3955 int nc = dd->nc[dd->dim[d]];
3956 for (int i = 0; i < nc; i++)
3958 rowMaster->cellFrac[i] = i/static_cast<real>(nc);
3961 rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
3962 rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
3965 rowMaster->cellFrac[nc] = 1.0;
3970 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3972 gmx_domdec_t *dd = cr->dd;
3975 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
3976 dd_warning(cr, fplog, buf);
3977 dd->comm->dlbState = edlbsOffCanTurnOn;
3978 dd->comm->haveTurnedOffDlb = true;
3979 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
3982 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3984 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
3986 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
3987 dd_warning(cr, fplog, buf);
3988 cr->dd->comm->dlbState = edlbsOffForever;
3991 static char *init_bLocalCG(const gmx_mtop_t *mtop)
3996 ncg = ncg_mtop(mtop);
3997 snew(bLocalCG, ncg);
3998 for (cg = 0; cg < ncg; cg++)
4000 bLocalCG[cg] = FALSE;
4006 void dd_init_bondeds(FILE *fplog,
4008 const gmx_mtop_t *mtop,
4009 const gmx_vsite_t *vsite,
4010 const t_inputrec *ir,
4011 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4013 gmx_domdec_comm_t *comm;
4015 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4019 if (comm->bBondComm)
4021 /* Communicate atoms beyond the cut-off for bonded interactions */
4024 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4026 comm->bLocalCG = init_bLocalCG(mtop);
4030 /* Only communicate atoms based on cut-off */
4031 comm->cglink = nullptr;
4032 comm->bLocalCG = nullptr;
4036 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4037 const gmx_mtop_t *mtop, const t_inputrec *ir,
4038 gmx_bool bDynLoadBal, real dlb_scale,
4039 const gmx_ddbox_t *ddbox)
4041 gmx_domdec_comm_t *comm;
4047 if (fplog == nullptr)
4056 fprintf(fplog, "The maximum number of communication pulses is:");
4057 for (d = 0; d < dd->ndim; d++)
4059 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4061 fprintf(fplog, "\n");
4062 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4063 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4064 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4065 for (d = 0; d < DIM; d++)
4069 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4076 comm->cellsize_min_dlb[d]/
4077 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4079 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4082 fprintf(fplog, "\n");
4086 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4087 fprintf(fplog, "The initial number of communication pulses is:");
4088 for (d = 0; d < dd->ndim; d++)
4090 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4092 fprintf(fplog, "\n");
4093 fprintf(fplog, "The initial domain decomposition cell size is:");
4094 for (d = 0; d < DIM; d++)
4098 fprintf(fplog, " %c %.2f nm",
4099 dim2char(d), dd->comm->cellsize_min[d]);
4102 fprintf(fplog, "\n\n");
4105 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
4107 if (comm->bInterCGBondeds ||
4109 dd->bInterCGcons || dd->bInterCGsettles)
4111 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4112 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4113 "non-bonded interactions", "", comm->cutoff);
4117 limit = dd->comm->cellsize_limit;
4121 if (dynamic_dd_box(ddbox, ir))
4123 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4125 limit = dd->comm->cellsize_min[XX];
4126 for (d = 1; d < DIM; d++)
4128 limit = std::min(limit, dd->comm->cellsize_min[d]);
4132 if (comm->bInterCGBondeds)
4134 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4135 "two-body bonded interactions", "(-rdd)",
4136 std::max(comm->cutoff, comm->cutoff_mbody));
4137 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4138 "multi-body bonded interactions", "(-rdd)",
4139 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4143 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4144 "virtual site constructions", "(-rcon)", limit);
4146 if (dd->bInterCGcons || dd->bInterCGsettles)
4148 sprintf(buf, "atoms separated by up to %d constraints",
4150 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4151 buf, "(-rcon)", limit);
4153 fprintf(fplog, "\n");
4159 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4161 const t_inputrec *ir,
4162 const gmx_ddbox_t *ddbox)
4164 gmx_domdec_comm_t *comm;
4165 int d, dim, npulse, npulse_d_max, npulse_d;
4170 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4172 /* Determine the maximum number of comm. pulses in one dimension */
4174 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4176 /* Determine the maximum required number of grid pulses */
4177 if (comm->cellsize_limit >= comm->cutoff)
4179 /* Only a single pulse is required */
4182 else if (!bNoCutOff && comm->cellsize_limit > 0)
4184 /* We round down slightly here to avoid overhead due to the latency
4185 * of extra communication calls when the cut-off
4186 * would be only slightly longer than the cell size.
4187 * Later cellsize_limit is redetermined,
4188 * so we can not miss interactions due to this rounding.
4190 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
4194 /* There is no cell size limit */
4195 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4198 if (!bNoCutOff && npulse > 1)
4200 /* See if we can do with less pulses, based on dlb_scale */
4202 for (d = 0; d < dd->ndim; d++)
4205 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
4206 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4207 npulse_d_max = std::max(npulse_d_max, npulse_d);
4209 npulse = std::min(npulse, npulse_d_max);
4212 /* This env var can override npulse */
4213 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4220 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4221 for (d = 0; d < dd->ndim; d++)
4223 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4224 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4225 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4227 comm->bVacDLBNoLimit = FALSE;
4231 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4232 if (!comm->bVacDLBNoLimit)
4234 comm->cellsize_limit = std::max(comm->cellsize_limit,
4235 comm->cutoff/comm->maxpulse);
4237 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4238 /* Set the minimum cell size for each DD dimension */
4239 for (d = 0; d < dd->ndim; d++)
4241 if (comm->bVacDLBNoLimit ||
4242 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4244 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4248 comm->cellsize_min_dlb[dd->dim[d]] =
4249 comm->cutoff/comm->cd[d].np_dlb;
4252 if (comm->cutoff_mbody <= 0)
4254 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4262 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4264 /* If each molecule is a single charge group
4265 * or we use domain decomposition for each periodic dimension,
4266 * we do not need to take pbc into account for the bonded interactions.
4268 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4271 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4274 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4275 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4276 const gmx_mtop_t *mtop, const t_inputrec *ir,
4277 const gmx_ddbox_t *ddbox)
4279 gmx_domdec_comm_t *comm;
4285 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4287 init_ddpme(dd, &comm->ddpme[0], 0);
4288 if (comm->npmedecompdim >= 2)
4290 init_ddpme(dd, &comm->ddpme[1], 1);
4295 comm->npmenodes = 0;
4296 if (dd->pme_nodeid >= 0)
4298 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4299 "Can not have separate PME ranks without PME electrostatics");
4305 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4307 if (!isDlbDisabled(comm))
4309 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4312 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4313 if (comm->dlbState == edlbsOffCanTurnOn)
4317 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4319 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4322 if (ir->ePBC == epbcNONE)
4324 vol_frac = 1 - 1/(double)dd->nnodes;
4329 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
4333 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4335 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4337 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
4340 /*! \brief Set some important DD parameters that can be modified by env.vars */
4341 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4343 gmx_domdec_comm_t *comm = dd->comm;
4345 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
4346 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4347 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4348 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4349 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4350 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4351 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4353 if (dd->bSendRecv2 && fplog)
4355 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4362 fprintf(fplog, "Will load balance based on FLOP count\n");
4364 if (comm->eFlop > 1)
4366 srand(1 + rank_mysim);
4368 comm->bRecordLoad = TRUE;
4372 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4376 DomdecOptions::DomdecOptions() :
4377 checkBondedInteractions(TRUE),
4378 useBondedCommunication(TRUE),
4380 rankOrder(DdRankOrder::pp_pme),
4381 minimumCommunicationRange(0),
4382 constraintCommunicationRange(0),
4383 dlbOption(DlbOption::turnOnWhenUseful),
4389 clear_ivec(numCells);
4392 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4393 const DomdecOptions &options,
4394 const MdrunOptions &mdrunOptions,
4395 const gmx_mtop_t *mtop,
4396 const t_inputrec *ir,
4398 gmx::ArrayRef<const gmx::RVec> xGlobal)
4405 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4408 dd = new gmx_domdec_t;
4410 dd->comm = init_dd_comm();
4412 /* Initialize DD paritioning counters */
4413 dd->comm->partition_step = INT_MIN;
4416 set_dd_envvar_options(fplog, dd, cr->nodeid);
4418 gmx_ddbox_t ddbox = {0};
4419 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4424 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4426 if (thisRankHasDuty(cr, DUTY_PP))
4428 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4430 setup_neighbor_relations(dd);
4433 /* Set overallocation to avoid frequent reallocation of arrays */
4434 set_over_alloc_dd(TRUE);
4436 clear_dd_cycle_counts(dd);
4441 static gmx_bool test_dd_cutoff(t_commrec *cr,
4442 t_state *state, const t_inputrec *ir,
4453 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4457 for (d = 0; d < dd->ndim; d++)
4461 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4462 if (dynamic_dd_box(&ddbox, ir))
4464 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4467 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4469 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4471 if (np > dd->comm->cd[d].np_dlb)
4476 /* If a current local cell size is smaller than the requested
4477 * cut-off, we could still fix it, but this gets very complicated.
4478 * Without fixing here, we might actually need more checks.
4480 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4487 if (!isDlbDisabled(dd->comm))
4489 /* If DLB is not active yet, we don't need to check the grid jumps.
4490 * Actually we shouldn't, because then the grid jump data is not set.
4492 if (isDlbOn(dd->comm) &&
4493 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4498 gmx_sumi(1, &LocallyLimited, cr);
4500 if (LocallyLimited > 0)
4509 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4512 gmx_bool bCutoffAllowed;
4514 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4518 cr->dd->comm->cutoff = cutoff_req;
4521 return bCutoffAllowed;
4524 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4526 gmx_domdec_comm_t *comm;
4528 comm = cr->dd->comm;
4530 /* Turn on the DLB limiting (might have been on already) */
4531 comm->bPMELoadBalDLBLimits = TRUE;
4533 /* Change the cut-off limit */
4534 comm->PMELoadBal_max_cutoff = cutoff;
4538 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4539 comm->PMELoadBal_max_cutoff);
4543 /* Sets whether we should later check the load imbalance data, so that
4544 * we can trigger dynamic load balancing if enough imbalance has
4547 * Used after PME load balancing unlocks DLB, so that the check
4548 * whether DLB will be useful can happen immediately.
4550 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4552 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4554 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4558 /* Store the DD partitioning count, so we can ignore cycle counts
4559 * over the next nstlist steps, which are often slower.
4561 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4566 /* Returns if we should check whether there has been enough load
4567 * imbalance to trigger dynamic load balancing.
4569 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4571 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4576 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4578 /* We ignore the first nstlist steps at the start of the run
4579 * or after PME load balancing or after turning DLB off, since
4580 * these often have extra allocation or cache miss overhead.
4585 if (dd->comm->cycl_n[ddCyclStep] == 0)
4587 /* We can have zero timed steps when dd_partition_system is called
4588 * more than once at the same step, e.g. with replica exchange.
4589 * Turning on DLB would trigger an assertion failure later, but is
4590 * also useless right after exchanging replicas.
4595 /* We should check whether we should use DLB directly after
4597 if (dd->comm->bCheckWhetherToTurnDlbOn)
4599 /* This flag was set when the PME load-balancing routines
4600 unlocked DLB, and should now be cleared. */
4601 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4604 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4605 * partitionings (we do not do this every partioning, so that we
4606 * avoid excessive communication). */
4607 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
4615 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4617 return isDlbOn(dd->comm);
4620 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4622 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4625 void dd_dlb_lock(gmx_domdec_t *dd)
4627 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4628 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4630 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4634 void dd_dlb_unlock(gmx_domdec_t *dd)
4636 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4637 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4639 dd->comm->dlbState = edlbsOffCanTurnOn;
4640 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4644 static void merge_cg_buffers(int ncell,
4645 gmx_domdec_comm_dim_t *cd, int pulse,
4647 gmx::ArrayRef<int> index_gl,
4649 rvec *cg_cm, rvec *recv_vr,
4650 gmx::ArrayRef<int> cgindex,
4651 cginfo_mb_t *cginfo_mb, int *cginfo)
4653 gmx_domdec_ind_t *ind, *ind_p;
4654 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4655 int shift, shift_at;
4657 ind = &cd->ind[pulse];
4659 /* First correct the already stored data */
4660 shift = ind->nrecv[ncell];
4661 for (cell = ncell-1; cell >= 0; cell--)
4663 shift -= ind->nrecv[cell];
4666 /* Move the cg's present from previous grid pulses */
4667 cg0 = ncg_cell[ncell+cell];
4668 cg1 = ncg_cell[ncell+cell+1];
4669 cgindex[cg1+shift] = cgindex[cg1];
4670 for (cg = cg1-1; cg >= cg0; cg--)
4672 index_gl[cg+shift] = index_gl[cg];
4673 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4674 cgindex[cg+shift] = cgindex[cg];
4675 cginfo[cg+shift] = cginfo[cg];
4677 /* Correct the already stored send indices for the shift */
4678 for (p = 1; p <= pulse; p++)
4680 ind_p = &cd->ind[p];
4682 for (c = 0; c < cell; c++)
4684 cg0 += ind_p->nsend[c];
4686 cg1 = cg0 + ind_p->nsend[cell];
4687 for (cg = cg0; cg < cg1; cg++)
4689 ind_p->index[cg] += shift;
4695 /* Merge in the communicated buffers */
4699 for (cell = 0; cell < ncell; cell++)
4701 cg1 = ncg_cell[ncell+cell+1] + shift;
4704 /* Correct the old cg indices */
4705 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4707 cgindex[cg+1] += shift_at;
4710 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4712 /* Copy this charge group from the buffer */
4713 index_gl[cg1] = recv_i[cg0];
4714 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4715 /* Add it to the cgindex */
4716 cg_gl = index_gl[cg1];
4717 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4718 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4719 cgindex[cg1+1] = cgindex[cg1] + nat;
4724 shift += ind->nrecv[cell];
4725 ncg_cell[ncell+cell+1] = cg1;
4729 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4732 const RangePartitioning &atomGroups)
4734 /* Store the atom block boundaries for easy copying of communication buffers
4736 int g = atomGroupStart;
4737 for (int zone = 0; zone < nzone; zone++)
4739 for (gmx_domdec_ind_t &ind : cd->ind)
4741 const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
4742 ind.cell2at0[zone] = range.begin();
4743 ind.cell2at1[zone] = range.end();
4744 g += ind.nrecv[zone];
4749 static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
4755 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4757 if (!bLocalCG[link->a[i]])
4766 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4768 real c[DIM][4]; /* the corners for the non-bonded communication */
4769 real cr0; /* corner for rounding */
4770 real cr1[4]; /* corners for rounding */
4771 real bc[DIM]; /* corners for bounded communication */
4772 real bcr1; /* corner for rounding for bonded communication */
4775 /* Determine the corners of the domain(s) we are communicating with */
4777 set_dd_corners(const gmx_domdec_t *dd,
4778 int dim0, int dim1, int dim2,
4782 const gmx_domdec_comm_t *comm;
4783 const gmx_domdec_zones_t *zones;
4788 zones = &comm->zones;
4790 /* Keep the compiler happy */
4794 /* The first dimension is equal for all cells */
4795 c->c[0][0] = comm->cell_x0[dim0];
4798 c->bc[0] = c->c[0][0];
4803 /* This cell row is only seen from the first row */
4804 c->c[1][0] = comm->cell_x0[dim1];
4805 /* All rows can see this row */
4806 c->c[1][1] = comm->cell_x0[dim1];
4807 if (isDlbOn(dd->comm))
4809 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4812 /* For the multi-body distance we need the maximum */
4813 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4816 /* Set the upper-right corner for rounding */
4817 c->cr0 = comm->cell_x1[dim0];
4822 for (j = 0; j < 4; j++)
4824 c->c[2][j] = comm->cell_x0[dim2];
4826 if (isDlbOn(dd->comm))
4828 /* Use the maximum of the i-cells that see a j-cell */
4829 for (i = 0; i < zones->nizone; i++)
4831 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4836 std::max(c->c[2][j-4],
4837 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4843 /* For the multi-body distance we need the maximum */
4844 c->bc[2] = comm->cell_x0[dim2];
4845 for (i = 0; i < 2; i++)
4847 for (j = 0; j < 2; j++)
4849 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4855 /* Set the upper-right corner for rounding */
4856 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4857 * Only cell (0,0,0) can see cell 7 (1,1,1)
4859 c->cr1[0] = comm->cell_x1[dim1];
4860 c->cr1[3] = comm->cell_x1[dim1];
4861 if (isDlbOn(dd->comm))
4863 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4866 /* For the multi-body distance we need the maximum */
4867 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4874 /* Add the atom groups we need to send in this pulse from this zone to
4875 * \p localAtomGroups and \p work
4878 get_zone_pulse_cgs(gmx_domdec_t *dd,
4879 int zonei, int zone,
4881 gmx::ArrayRef<const int> globalAtomGroupIndices,
4882 const gmx::RangePartitioning &atomGroups,
4883 int dim, int dim_ind,
4884 int dim0, int dim1, int dim2,
4885 real r_comm2, real r_bcomm2,
4887 bool distanceIsTriclinic,
4889 real skew_fac2_d, real skew_fac_01,
4890 rvec *v_d, rvec *v_0, rvec *v_1,
4891 const dd_corners_t *c,
4892 const rvec sf2_round,
4893 gmx_bool bDistBonded,
4899 std::vector<int> *localAtomGroups,
4900 dd_comm_setup_work_t *work)
4902 gmx_domdec_comm_t *comm;
4904 gmx_bool bDistMB_pulse;
4906 real r2, rb2, r, tric_sh;
4913 bScrew = (dd->bScrewPBC && dim == XX);
4915 bDistMB_pulse = (bDistMB && bDistBonded);
4917 /* Unpack the work data */
4918 std::vector<int> &ibuf = work->atomGroupBuffer;
4919 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4923 for (cg = cg0; cg < cg1; cg++)
4927 if (!distanceIsTriclinic)
4929 /* Rectangular direction, easy */
4930 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4937 r = cg_cm[cg][dim] - c->bc[dim_ind];
4943 /* Rounding gives at most a 16% reduction
4944 * in communicated atoms
4946 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4948 r = cg_cm[cg][dim0] - c->cr0;
4949 /* This is the first dimension, so always r >= 0 */
4956 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
4958 r = cg_cm[cg][dim1] - c->cr1[zone];
4965 r = cg_cm[cg][dim1] - c->bcr1;
4975 /* Triclinic direction, more complicated */
4978 /* Rounding, conservative as the skew_fac multiplication
4979 * will slightly underestimate the distance.
4981 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
4983 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
4984 for (i = dim0+1; i < DIM; i++)
4986 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
4988 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
4991 rb[dim0] = rn[dim0];
4994 /* Take care that the cell planes along dim0 might not
4995 * be orthogonal to those along dim1 and dim2.
4997 for (i = 1; i <= dim_ind; i++)
5000 if (normal[dim0][dimd] > 0)
5002 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5005 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5010 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5012 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5014 for (i = dim1+1; i < DIM; i++)
5016 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5018 rn[dim1] += tric_sh;
5021 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5022 /* Take care of coupling of the distances
5023 * to the planes along dim0 and dim1 through dim2.
5025 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5026 /* Take care that the cell planes along dim1
5027 * might not be orthogonal to that along dim2.
5029 if (normal[dim1][dim2] > 0)
5031 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5037 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5040 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5041 /* Take care of coupling of the distances
5042 * to the planes along dim0 and dim1 through dim2.
5044 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5045 /* Take care that the cell planes along dim1
5046 * might not be orthogonal to that along dim2.
5048 if (normal[dim1][dim2] > 0)
5050 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5055 /* The distance along the communication direction */
5056 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5058 for (i = dim+1; i < DIM; i++)
5060 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5065 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5066 /* Take care of coupling of the distances
5067 * to the planes along dim0 and dim1 through dim2.
5069 if (dim_ind == 1 && zonei == 1)
5071 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5077 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5080 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5081 /* Take care of coupling of the distances
5082 * to the planes along dim0 and dim1 through dim2.
5084 if (dim_ind == 1 && zonei == 1)
5086 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5094 ((bDistMB && rb2 < r_bcomm2) ||
5095 (bDist2B && r2 < r_bcomm2)) &&
5097 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5098 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5101 /* Store the local and global atom group indices and position */
5102 localAtomGroups->push_back(cg);
5103 ibuf.push_back(globalAtomGroupIndices[cg]);
5107 if (dd->ci[dim] == 0)
5109 /* Correct cg_cm for pbc */
5110 rvec_add(cg_cm[cg], box[dim], posPbc);
5113 posPbc[YY] = box[YY][YY] - posPbc[YY];
5114 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5119 copy_rvec(cg_cm[cg], posPbc);
5121 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5123 nat += atomGroups.block(cg).size();
5128 work->nsend_zone = nsend_z;
5131 static void clearCommSetupData(dd_comm_setup_work_t *work)
5133 work->localAtomGroupBuffer.clear();
5134 work->atomGroupBuffer.clear();
5135 work->positionBuffer.clear();
5137 work->nsend_zone = 0;
5140 static void setup_dd_communication(gmx_domdec_t *dd,
5141 matrix box, gmx_ddbox_t *ddbox,
5143 t_state *state, PaddedRVecVector *f)
5145 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5146 int nzone, nzone_send, zone, zonei, cg0, cg1;
5147 int c, i, cg, cg_gl, nrcg;
5148 int *zone_cg_range, pos_cg;
5149 gmx_domdec_comm_t *comm;
5150 gmx_domdec_zones_t *zones;
5151 gmx_domdec_comm_dim_t *cd;
5152 cginfo_mb_t *cginfo_mb;
5153 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5154 real r_comm2, r_bcomm2;
5155 dd_corners_t corners;
5156 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5157 real skew_fac2_d, skew_fac_01;
5162 fprintf(debug, "Setting up DD communication\n");
5167 if (comm->dth.empty())
5169 /* Initialize the thread data.
5170 * This can not be done in init_domain_decomposition,
5171 * as the numbers of threads is determined later.
5173 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5174 comm->dth.resize(numThreads);
5177 switch (fr->cutoff_scheme)
5183 cg_cm = as_rvec_array(state->x.data());
5186 gmx_incons("unimplemented");
5190 bBondComm = comm->bBondComm;
5192 /* Do we need to determine extra distances for multi-body bondeds? */
5193 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5195 /* Do we need to determine extra distances for only two-body bondeds? */
5196 bDist2B = (bBondComm && !bDistMB);
5198 r_comm2 = gmx::square(comm->cutoff);
5199 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5203 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
5206 zones = &comm->zones;
5209 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5210 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5212 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5214 /* Triclinic stuff */
5215 normal = ddbox->normal;
5219 v_0 = ddbox->v[dim0];
5220 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5222 /* Determine the coupling coefficient for the distances
5223 * to the cell planes along dim0 and dim1 through dim2.
5224 * This is required for correct rounding.
5227 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5230 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5236 v_1 = ddbox->v[dim1];
5239 zone_cg_range = zones->cg_range;
5240 cginfo_mb = fr->cginfo_mb;
5242 zone_cg_range[0] = 0;
5243 zone_cg_range[1] = dd->ncg_home;
5244 comm->zone_ncg1[0] = dd->ncg_home;
5245 pos_cg = dd->ncg_home;
5247 nat_tot = comm->atomRanges.numHomeAtoms();
5249 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5251 dim = dd->dim[dim_ind];
5252 cd = &comm->cd[dim_ind];
5254 /* Check if we need to compute triclinic distances along this dim */
5255 bool distanceIsTriclinic = false;
5256 for (i = 0; i <= dim_ind; i++)
5258 if (ddbox->tric_dir[dd->dim[i]])
5260 distanceIsTriclinic = true;
5264 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5266 /* No pbc in this dimension, the first node should not comm. */
5274 v_d = ddbox->v[dim];
5275 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5277 cd->receiveInPlace = true;
5278 for (int p = 0; p < cd->numPulses(); p++)
5280 /* Only atoms communicated in the first pulse are used
5281 * for multi-body bonded interactions or for bBondComm.
5283 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5285 gmx_domdec_ind_t *ind = &cd->ind[p];
5287 /* Thread 0 writes in the global index array */
5289 clearCommSetupData(&comm->dth[0]);
5291 for (zone = 0; zone < nzone_send; zone++)
5293 if (dim_ind > 0 && distanceIsTriclinic)
5295 /* Determine slightly more optimized skew_fac's
5297 * This reduces the number of communicated atoms
5298 * by about 10% for 3D DD of rhombic dodecahedra.
5300 for (dimd = 0; dimd < dim; dimd++)
5302 sf2_round[dimd] = 1;
5303 if (ddbox->tric_dir[dimd])
5305 for (i = dd->dim[dimd]+1; i < DIM; i++)
5307 /* If we are shifted in dimension i
5308 * and the cell plane is tilted forward
5309 * in dimension i, skip this coupling.
5311 if (!(zones->shift[nzone+zone][i] &&
5312 ddbox->v[dimd][i][dimd] >= 0))
5315 gmx::square(ddbox->v[dimd][i][dimd]);
5318 sf2_round[dimd] = 1/sf2_round[dimd];
5323 zonei = zone_perm[dim_ind][zone];
5326 /* Here we permutate the zones to obtain a convenient order
5327 * for neighbor searching
5329 cg0 = zone_cg_range[zonei];
5330 cg1 = zone_cg_range[zonei+1];
5334 /* Look only at the cg's received in the previous grid pulse
5336 cg1 = zone_cg_range[nzone+zone+1];
5337 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5340 const int numThreads = static_cast<int>(comm->dth.size());
5341 #pragma omp parallel for num_threads(numThreads) schedule(static)
5342 for (int th = 0; th < numThreads; th++)
5346 dd_comm_setup_work_t &work = comm->dth[th];
5348 /* Retain data accumulated into buffers of thread 0 */
5351 clearCommSetupData(&work);
5354 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5355 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5357 /* Get the cg's for this pulse in this zone */
5358 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5359 dd->globalAtomGroupIndices,
5361 dim, dim_ind, dim0, dim1, dim2,
5363 box, distanceIsTriclinic,
5364 normal, skew_fac2_d, skew_fac_01,
5365 v_d, v_0, v_1, &corners, sf2_round,
5366 bDistBonded, bBondComm,
5369 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5372 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5375 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5376 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5377 ind->nsend[zone] = comm->dth[0].nsend_zone;
5378 /* Append data of threads>=1 to the communication buffers */
5379 for (int th = 1; th < numThreads; th++)
5381 const dd_comm_setup_work_t &dth = comm->dth[th];
5383 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5384 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5385 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5386 comm->dth[0].nat += dth.nat;
5387 ind->nsend[zone] += dth.nsend_zone;
5390 /* Clear the counts in case we do not have pbc */
5391 for (zone = nzone_send; zone < nzone; zone++)
5393 ind->nsend[zone] = 0;
5395 ind->nsend[nzone] = ind->index.size();
5396 ind->nsend[nzone + 1] = comm->dth[0].nat;
5397 /* Communicate the number of cg's and atoms to receive */
5398 ddSendrecv(dd, dim_ind, dddirBackward,
5399 ind->nsend, nzone+2,
5400 ind->nrecv, nzone+2);
5404 /* We can receive in place if only the last zone is not empty */
5405 for (zone = 0; zone < nzone-1; zone++)
5407 if (ind->nrecv[zone] > 0)
5409 cd->receiveInPlace = false;
5414 int receiveBufferSize = 0;
5415 if (!cd->receiveInPlace)
5417 receiveBufferSize = ind->nrecv[nzone];
5419 /* These buffer are actually only needed with in-place */
5420 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5421 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5423 dd_comm_setup_work_t &work = comm->dth[0];
5425 /* Make space for the global cg indices */
5426 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5427 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5428 /* Communicate the global cg indices */
5429 gmx::ArrayRef<int> integerBufferRef;
5430 if (cd->receiveInPlace)
5432 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5436 integerBufferRef = globalAtomGroupBuffer.buffer;
5438 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5439 work.atomGroupBuffer, integerBufferRef);
5441 /* Make space for cg_cm */
5442 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5443 if (fr->cutoff_scheme == ecutsGROUP)
5449 cg_cm = as_rvec_array(state->x.data());
5451 /* Communicate cg_cm */
5452 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5453 if (cd->receiveInPlace)
5455 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5459 rvecBufferRef = rvecBuffer.buffer;
5461 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5462 work.positionBuffer, rvecBufferRef);
5464 /* Make the charge group index */
5465 if (cd->receiveInPlace)
5467 zone = (p == 0 ? 0 : nzone - 1);
5468 while (zone < nzone)
5470 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5472 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5473 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5474 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5475 dd->atomGrouping_.appendBlock(nrcg);
5478 /* Update the charge group presence,
5479 * so we can use it in the next pass of the loop.
5481 comm->bLocalCG[cg_gl] = TRUE;
5487 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5490 zone_cg_range[nzone+zone] = pos_cg;
5495 /* This part of the code is never executed with bBondComm. */
5496 std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
5497 atomGroupsIndex.resize(numAtomGroupsNew + 1);
5499 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5500 dd->globalAtomGroupIndices, integerBufferRef.data(),
5501 cg_cm, as_rvec_array(rvecBufferRef.data()),
5503 fr->cginfo_mb, fr->cginfo);
5504 pos_cg += ind->nrecv[nzone];
5506 nat_tot += ind->nrecv[nzone+1];
5508 if (!cd->receiveInPlace)
5510 /* Store the atom block for easy copying of communication buffers */
5511 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
5516 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5520 /* We don't need to update cginfo, since that was alrady done above.
5521 * So we pass NULL for the forcerec.
5523 dd_set_cginfo(dd->globalAtomGroupIndices,
5524 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5525 nullptr, comm->bLocalCG);
5530 fprintf(debug, "Finished setting up DD communication, zones:");
5531 for (c = 0; c < zones->n; c++)
5533 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5535 fprintf(debug, "\n");
5539 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5543 for (c = 0; c < zones->nizone; c++)
5545 zones->izone[c].cg1 = zones->cg_range[c+1];
5546 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5547 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5551 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5553 * Also sets the atom density for the home zone when \p zone_start=0.
5554 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5555 * how many charge groups will move but are still part of the current range.
5556 * \todo When converting domdec to use proper classes, all these variables
5557 * should be private and a method should return the correct count
5558 * depending on an internal state.
5560 * \param[in,out] dd The domain decomposition struct
5561 * \param[in] box The box
5562 * \param[in] ddbox The domain decomposition box struct
5563 * \param[in] zone_start The start of the zone range to set sizes for
5564 * \param[in] zone_end The end of the zone range to set sizes for
5565 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5567 static void set_zones_size(gmx_domdec_t *dd,
5568 matrix box, const gmx_ddbox_t *ddbox,
5569 int zone_start, int zone_end,
5570 int numMovedChargeGroupsInHomeZone)
5572 gmx_domdec_comm_t *comm;
5573 gmx_domdec_zones_t *zones;
5582 zones = &comm->zones;
5584 /* Do we need to determine extra distances for multi-body bondeds? */
5585 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5587 for (z = zone_start; z < zone_end; z++)
5589 /* Copy cell limits to zone limits.
5590 * Valid for non-DD dims and non-shifted dims.
5592 copy_rvec(comm->cell_x0, zones->size[z].x0);
5593 copy_rvec(comm->cell_x1, zones->size[z].x1);
5596 for (d = 0; d < dd->ndim; d++)
5600 for (z = 0; z < zones->n; z++)
5602 /* With a staggered grid we have different sizes
5603 * for non-shifted dimensions.
5605 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5609 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5610 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5614 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5615 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5621 rcmbs = comm->cutoff_mbody;
5622 if (ddbox->tric_dir[dim])
5624 rcs /= ddbox->skew_fac[dim];
5625 rcmbs /= ddbox->skew_fac[dim];
5628 /* Set the lower limit for the shifted zone dimensions */
5629 for (z = zone_start; z < zone_end; z++)
5631 if (zones->shift[z][dim] > 0)
5634 if (!isDlbOn(dd->comm) || d == 0)
5636 zones->size[z].x0[dim] = comm->cell_x1[dim];
5637 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5641 /* Here we take the lower limit of the zone from
5642 * the lowest domain of the zone below.
5646 zones->size[z].x0[dim] =
5647 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5653 zones->size[z].x0[dim] =
5654 zones->size[zone_perm[2][z-4]].x0[dim];
5658 zones->size[z].x0[dim] =
5659 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5662 /* A temporary limit, is updated below */
5663 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5667 for (zi = 0; zi < zones->nizone; zi++)
5669 if (zones->shift[zi][dim] == 0)
5671 /* This takes the whole zone into account.
5672 * With multiple pulses this will lead
5673 * to a larger zone then strictly necessary.
5675 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5676 zones->size[zi].x1[dim]+rcmbs);
5684 /* Loop over the i-zones to set the upper limit of each
5687 for (zi = 0; zi < zones->nizone; zi++)
5689 if (zones->shift[zi][dim] == 0)
5691 /* We should only use zones up to zone_end */
5692 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5693 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5695 if (zones->shift[z][dim] > 0)
5697 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5698 zones->size[zi].x1[dim]+rcs);
5705 for (z = zone_start; z < zone_end; z++)
5707 /* Initialization only required to keep the compiler happy */
5708 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5711 /* To determine the bounding box for a zone we need to find
5712 * the extreme corners of 4, 2 or 1 corners.
5714 nc = 1 << (ddbox->nboundeddim - 1);
5716 for (c = 0; c < nc; c++)
5718 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5722 corner[YY] = zones->size[z].x0[YY];
5726 corner[YY] = zones->size[z].x1[YY];
5730 corner[ZZ] = zones->size[z].x0[ZZ];
5734 corner[ZZ] = zones->size[z].x1[ZZ];
5736 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5737 box[ZZ][1 - dd->dim[0]] != 0)
5739 /* With 1D domain decomposition the cg's are not in
5740 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5741 * Shift the corner of the z-vector back to along the box
5742 * vector of dimension d, so it will later end up at 0 along d.
5743 * This can affect the location of this corner along dd->dim[0]
5744 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5746 int d = 1 - dd->dim[0];
5748 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5750 /* Apply the triclinic couplings */
5751 assert(ddbox->npbcdim <= DIM);
5752 for (i = YY; i < ddbox->npbcdim; i++)
5754 for (j = XX; j < i; j++)
5756 corner[j] += corner[i]*box[i][j]/box[i][i];
5761 copy_rvec(corner, corner_min);
5762 copy_rvec(corner, corner_max);
5766 for (i = 0; i < DIM; i++)
5768 corner_min[i] = std::min(corner_min[i], corner[i]);
5769 corner_max[i] = std::max(corner_max[i], corner[i]);
5773 /* Copy the extreme cornes without offset along x */
5774 for (i = 0; i < DIM; i++)
5776 zones->size[z].bb_x0[i] = corner_min[i];
5777 zones->size[z].bb_x1[i] = corner_max[i];
5779 /* Add the offset along x */
5780 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5781 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5784 if (zone_start == 0)
5787 for (dim = 0; dim < DIM; dim++)
5789 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5791 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5796 for (z = zone_start; z < zone_end; z++)
5798 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5800 zones->size[z].x0[XX], zones->size[z].x1[XX],
5801 zones->size[z].x0[YY], zones->size[z].x1[YY],
5802 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5803 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5805 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5806 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5807 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5812 static int comp_cgsort(const void *a, const void *b)
5816 gmx_cgsort_t *cga, *cgb;
5817 cga = (gmx_cgsort_t *)a;
5818 cgb = (gmx_cgsort_t *)b;
5820 comp = cga->nsc - cgb->nsc;
5823 comp = cga->ind_gl - cgb->ind_gl;
5829 /* Order data in \p dataToSort according to \p sort
5831 * Note: both buffers should have at least \p sort.size() elements.
5833 template <typename T>
5835 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5836 gmx::ArrayRef<T> dataToSort,
5837 gmx::ArrayRef<T> sortBuffer)
5839 GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
5840 GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
5842 /* Order the data into the temporary buffer */
5844 for (const gmx_cgsort_t &entry : sort)
5846 sortBuffer[i++] = dataToSort[entry.ind];
5849 /* Copy back to the original array */
5850 std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
5851 dataToSort.begin());
5854 /* Order data in \p dataToSort according to \p sort
5856 * Note: \p vectorToSort should have at least \p sort.size() elements,
5857 * \p workVector is resized when it is too small.
5859 template <typename T>
5861 orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
5862 gmx::ArrayRef<T> vectorToSort,
5863 std::vector<T> *workVector)
5865 if (gmx::index(workVector->size()) < sort.size())
5867 workVector->resize(sort.size());
5869 orderVector<T>(sort, vectorToSort, *workVector);
5872 static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
5873 gmx::ArrayRef<const gmx_cgsort_t> sort,
5874 gmx::ArrayRef<gmx::RVec> v,
5875 gmx::ArrayRef<gmx::RVec> buf)
5877 if (atomGroups == nullptr)
5879 /* Avoid the useless loop of the atoms within a cg */
5880 orderVector(sort, v, buf);
5885 /* Order the data */
5887 for (const gmx_cgsort_t &entry : sort)
5889 for (int i : atomGroups->block(entry.ind))
5891 copy_rvec(v[i], buf[a]);
5897 /* Copy back to the original array */
5898 for (int a = 0; a < atot; a++)
5900 copy_rvec(buf[a], v[a]);
5904 /* Returns whether a < b */
5905 static bool compareCgsort(const gmx_cgsort_t &a,
5906 const gmx_cgsort_t &b)
5908 return (a.nsc < b.nsc ||
5909 (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
5912 static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
5913 gmx::ArrayRef<gmx_cgsort_t> moved,
5914 std::vector<gmx_cgsort_t> *sort1)
5916 /* The new indices are not very ordered, so we qsort them */
5917 gmx_qsort_threadsafe(moved.data(), moved.size(), sizeof(moved[0]), comp_cgsort);
5919 /* stationary is already ordered, so now we can merge the two arrays */
5920 sort1->resize(stationary.size() + moved.size());
5921 std::merge(stationary.begin(), stationary.end(),
5922 moved.begin(), moved.end(),
5927 /* Set the sorting order for systems with charge groups, returned in sort->sort.
5928 * The order is according to the global charge group index.
5929 * This adds and removes charge groups that moved between domains.
5931 static void dd_sort_order(const gmx_domdec_t *dd,
5932 const t_forcerec *fr,
5934 gmx_domdec_sort_t *sort)
5936 const int *a = fr->ns->grid->cell_index;
5938 const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
5940 if (ncg_home_old >= 0)
5942 std::vector<gmx_cgsort_t> &stationary = sort->stationary;
5943 std::vector<gmx_cgsort_t> &moved = sort->moved;
5945 /* The charge groups that remained in the same ns grid cell
5946 * are completely ordered. So we can sort efficiently by sorting
5947 * the charge groups that did move into the stationary list.
5948 * Note: push_back() seems to be slightly slower than direct access.
5952 for (int i = 0; i < dd->ncg_home; i++)
5954 /* Check if this cg did not move to another node */
5955 if (a[i] < movedValue)
5959 entry.ind_gl = dd->globalAtomGroupIndices[i];
5962 if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
5964 /* This cg is new on this node or moved ns grid cell */
5965 moved.push_back(entry);
5969 /* This cg did not move */
5970 stationary.push_back(entry);
5977 fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
5978 stationary.size(), moved.size());
5980 /* Sort efficiently */
5981 orderedSort(stationary, moved, &sort->sorted);
5985 std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
5987 cgsort.reserve(dd->ncg_home);
5989 for (int i = 0; i < dd->ncg_home; i++)
5991 /* Sort on the ns grid cell indices
5992 * and the global topology index
5996 entry.ind_gl = dd->globalAtomGroupIndices[i];
5998 cgsort.push_back(entry);
5999 if (cgsort[i].nsc < movedValue)
6006 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
6008 /* Determine the order of the charge groups using qsort */
6009 gmx_qsort_threadsafe(cgsort.data(), dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
6011 /* Remove the charge groups which are no longer at home here */
6012 cgsort.resize(numCGNew);
6016 /* Returns the sorting order for atoms based on the nbnxn grid order in sort */
6017 static void dd_sort_order_nbnxn(const t_forcerec *fr,
6018 std::vector<gmx_cgsort_t> *sort)
6020 gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
6022 /* Using push_back() instead of this resize results in much slower code */
6023 sort->resize(atomOrder.size());
6024 gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
6025 size_t numSorted = 0;
6026 for (int i : atomOrder)
6030 /* The values of nsc and ind_gl are not used in this case */
6031 buffer[numSorted++].ind = i;
6034 sort->resize(numSorted);
6037 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6040 gmx_domdec_sort_t *sort = dd->comm->sort.get();
6042 switch (fr->cutoff_scheme)
6045 dd_sort_order(dd, fr, ncg_home_old, sort);
6048 dd_sort_order_nbnxn(fr, &sort->sorted);
6051 gmx_incons("unimplemented");
6054 const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
6056 /* We alloc with the old size, since cgindex is still old */
6057 GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
6058 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
6060 const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
6062 /* Set the new home atom/charge group count */
6063 dd->ncg_home = sort->sorted.size();
6066 fprintf(debug, "Set the new home charge group count to %d\n",
6070 /* Reorder the state */
6071 gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
6072 GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
6074 if (state->flags & (1 << estX))
6076 order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6078 if (state->flags & (1 << estV))
6080 order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6082 if (state->flags & (1 << estCGP))
6084 order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6087 if (fr->cutoff_scheme == ecutsGROUP)
6090 gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
6091 orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
6094 /* Reorder the global cg index */
6095 orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
6096 /* Reorder the cginfo */
6097 orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
6098 /* Rebuild the local cg index */
6101 /* We make a new, ordered atomGroups object and assign it to
6102 * the old one. This causes some allocation overhead, but saves
6103 * a copy back of the whole index.
6105 gmx::RangePartitioning ordered;
6106 for (const gmx_cgsort_t &entry : cgsort)
6108 ordered.appendBlock(atomGrouping.block(entry.ind).size());
6110 dd->atomGrouping_ = ordered;
6114 dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
6116 /* Set the home atom number */
6117 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
6119 if (fr->cutoff_scheme == ecutsVERLET)
6121 /* The atoms are now exactly in grid order, update the grid order */
6122 nbnxn_set_atomorder(fr->nbv->nbs.get());
6126 /* Copy the sorted ns cell indices back to the ns grid struct */
6127 for (gmx::index i = 0; i < cgsort.size(); i++)
6129 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6131 fr->ns->grid->nr = cgsort.size();
6135 static void add_dd_statistics(gmx_domdec_t *dd)
6137 gmx_domdec_comm_t *comm = dd->comm;
6139 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6141 auto range = static_cast<DDAtomRanges::Type>(i);
6143 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6148 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6150 gmx_domdec_comm_t *comm = dd->comm;
6152 /* Reset all the statistics and counters for total run counting */
6153 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6155 comm->sum_nat[i] = 0;
6159 comm->load_step = 0;
6162 clear_ivec(comm->load_lim);
6167 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6169 gmx_domdec_comm_t *comm = cr->dd->comm;
6171 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6172 gmx_sumd(numRanges, comm->sum_nat, cr);
6174 if (fplog == nullptr)
6179 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6181 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6183 auto range = static_cast<DDAtomRanges::Type>(i);
6184 double av = comm->sum_nat[i]/comm->ndecomp;
6187 case DDAtomRanges::Type::Zones:
6189 " av. #atoms communicated per step for force: %d x %.1f\n",
6192 case DDAtomRanges::Type::Vsites:
6193 if (cr->dd->vsite_comm)
6196 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6197 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6201 case DDAtomRanges::Type::Constraints:
6202 if (cr->dd->constraint_comm)
6205 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6206 1 + ir->nLincsIter, av);
6210 gmx_incons(" Unknown type for DD statistics");
6213 fprintf(fplog, "\n");
6215 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6217 print_dd_load_av(fplog, cr->dd);
6221 void dd_partition_system(FILE *fplog,
6223 const t_commrec *cr,
6224 gmx_bool bMasterState,
6226 t_state *state_global,
6227 const gmx_mtop_t *top_global,
6228 const t_inputrec *ir,
6229 t_state *state_local,
6230 PaddedRVecVector *f,
6231 gmx::MDAtoms *mdAtoms,
6232 gmx_localtop_t *top_local,
6235 gmx::Constraints *constr,
6237 gmx_wallcycle *wcycle,
6241 gmx_domdec_comm_t *comm;
6242 gmx_ddbox_t ddbox = {0};
6244 gmx_int64_t step_pcoupl;
6245 rvec cell_ns_x0, cell_ns_x1;
6246 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6247 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6248 gmx_bool bRedist, bSortCG, bResortAll;
6249 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6253 wallcycle_start(wcycle, ewcDOMDEC);
6258 // TODO if the update code becomes accessible here, use
6259 // upd->deform for this logic.
6260 bBoxChanged = (bMasterState || inputrecDeform(ir));
6261 if (ir->epc != epcNO)
6263 /* With nstpcouple > 1 pressure coupling happens.
6264 * one step after calculating the pressure.
6265 * Box scaling happens at the end of the MD step,
6266 * after the DD partitioning.
6267 * We therefore have to do DLB in the first partitioning
6268 * after an MD step where P-coupling occurred.
6269 * We need to determine the last step in which p-coupling occurred.
6270 * MRS -- need to validate this for vv?
6272 int n = ir->nstpcouple;
6275 step_pcoupl = step - 1;
6279 step_pcoupl = ((step - 1)/n)*n + 1;
6281 if (step_pcoupl >= comm->partition_step)
6287 bNStGlobalComm = (step % nstglobalcomm == 0);
6295 /* Should we do dynamic load balacing this step?
6296 * Since it requires (possibly expensive) global communication,
6297 * we might want to do DLB less frequently.
6299 if (bBoxChanged || ir->epc != epcNO)
6301 bDoDLB = bBoxChanged;
6305 bDoDLB = bNStGlobalComm;
6309 /* Check if we have recorded loads on the nodes */
6310 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6312 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6314 /* Print load every nstlog, first and last step to the log file */
6315 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6316 comm->n_load_collect == 0 ||
6318 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6320 /* Avoid extra communication due to verbose screen output
6321 * when nstglobalcomm is set.
6323 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6324 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6326 get_load_distribution(dd, wcycle);
6331 dd_print_load(fplog, dd, step-1);
6335 dd_print_load_verbose(dd);
6338 comm->n_load_collect++;
6344 /* Add the measured cycles to the running average */
6345 const float averageFactor = 0.1f;
6346 comm->cyclesPerStepDlbExpAverage =
6347 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6348 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6350 if (comm->dlbState == edlbsOnCanTurnOff &&
6351 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6353 gmx_bool turnOffDlb;
6356 /* If the running averaged cycles with DLB are more
6357 * than before we turned on DLB, turn off DLB.
6358 * We will again run and check the cycles without DLB
6359 * and we can then decide if to turn off DLB forever.
6361 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6362 comm->cyclesPerStepBeforeDLB);
6364 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6367 /* To turn off DLB, we need to redistribute the atoms */
6368 dd_collect_state(dd, state_local, state_global);
6369 bMasterState = TRUE;
6370 turn_off_dlb(fplog, cr, step);
6374 else if (bCheckWhetherToTurnDlbOn)
6376 gmx_bool turnOffDlbForever = FALSE;
6377 gmx_bool turnOnDlb = FALSE;
6379 /* Since the timings are node dependent, the master decides */
6382 /* If we recently turned off DLB, we want to check if
6383 * performance is better without DLB. We want to do this
6384 * ASAP to minimize the chance that external factors
6385 * slowed down the DLB step are gone here and we
6386 * incorrectly conclude that DLB was causing the slowdown.
6387 * So we measure one nstlist block, no running average.
6389 if (comm->haveTurnedOffDlb &&
6390 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6391 comm->cyclesPerStepDlbExpAverage)
6393 /* After turning off DLB we ran nstlist steps in fewer
6394 * cycles than with DLB. This likely means that DLB
6395 * in not benefical, but this could be due to a one
6396 * time unlucky fluctuation, so we require two such
6397 * observations in close succession to turn off DLB
6400 if (comm->dlbSlowerPartitioningCount > 0 &&
6401 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6403 turnOffDlbForever = TRUE;
6405 comm->haveTurnedOffDlb = false;
6406 /* Register when we last measured DLB slowdown */
6407 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6411 /* Here we check if the max PME rank load is more than 0.98
6412 * the max PP force load. If so, PP DLB will not help,
6413 * since we are (almost) limited by PME. Furthermore,
6414 * DLB will cause a significant extra x/f redistribution
6415 * cost on the PME ranks, which will then surely result
6416 * in lower total performance.
6418 if (cr->npmenodes > 0 &&
6419 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6425 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6431 gmx_bool turnOffDlbForever;
6435 turnOffDlbForever, turnOnDlb
6437 dd_bcast(dd, sizeof(bools), &bools);
6438 if (bools.turnOffDlbForever)
6440 turn_off_dlb_forever(fplog, cr, step);
6442 else if (bools.turnOnDlb)
6444 turn_on_dlb(fplog, cr, step);
6449 comm->n_load_have++;
6452 cgs_gl = &comm->cgs_gl;
6457 /* Clear the old state */
6458 clearDDStateIndices(dd, 0, 0);
6461 auto xGlobal = positionsFromStatePointer(state_global);
6463 set_ddbox(dd, true, ir,
6464 DDMASTER(dd) ? state_global->box : nullptr,
6468 distributeState(fplog, dd, state_global, ddbox, state_local, f);
6470 dd_make_local_cgs(dd, &top_local->cgs);
6472 /* Ensure that we have space for the new distribution */
6473 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6475 if (fr->cutoff_scheme == ecutsGROUP)
6477 calc_cgcm(fplog, 0, dd->ncg_home,
6478 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6481 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6483 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6485 else if (state_local->ddp_count != dd->ddp_count)
6487 if (state_local->ddp_count > dd->ddp_count)
6489 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
6492 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6494 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6497 /* Clear the old state */
6498 clearDDStateIndices(dd, 0, 0);
6500 /* Restore the atom group indices from state_local */
6501 restoreAtomGroups(dd, cgs_gl->index, state_local);
6502 make_dd_indices(dd, cgs_gl->index, 0);
6503 ncgindex_set = dd->ncg_home;
6505 if (fr->cutoff_scheme == ecutsGROUP)
6507 /* Redetermine the cg COMs */
6508 calc_cgcm(fplog, 0, dd->ncg_home,
6509 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6512 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6514 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6516 set_ddbox(dd, bMasterState, ir, state_local->box,
6517 true, state_local->x, &ddbox);
6519 bRedist = isDlbOn(comm);
6523 /* We have the full state, only redistribute the cgs */
6525 /* Clear the non-home indices */
6526 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6529 /* Avoid global communication for dim's without pbc and -gcom */
6530 if (!bNStGlobalComm)
6532 copy_rvec(comm->box0, ddbox.box0 );
6533 copy_rvec(comm->box_size, ddbox.box_size);
6535 set_ddbox(dd, bMasterState, ir, state_local->box,
6536 bNStGlobalComm, state_local->x, &ddbox);
6541 /* For dim's without pbc and -gcom */
6542 copy_rvec(ddbox.box0, comm->box0 );
6543 copy_rvec(ddbox.box_size, comm->box_size);
6545 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6548 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6550 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6553 /* Check if we should sort the charge groups */
6554 bSortCG = (bMasterState || bRedist);
6556 ncg_home_old = dd->ncg_home;
6558 /* When repartitioning we mark charge groups that will move to neighboring
6559 * DD cells, but we do not move them right away for performance reasons.
6560 * Thus we need to keep track of how many charge groups will move for
6561 * obtaining correct local charge group / atom counts.
6566 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6568 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6570 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
6572 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6575 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6577 &comm->cell_x0, &comm->cell_x1,
6578 dd->ncg_home, fr->cg_cm,
6579 cell_ns_x0, cell_ns_x1, &grid_density);
6583 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6586 switch (fr->cutoff_scheme)
6589 copy_ivec(fr->ns->grid->n, ncells_old);
6590 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6591 state_local->box, cell_ns_x0, cell_ns_x1,
6592 fr->rlist, grid_density);
6595 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6598 gmx_incons("unimplemented");
6600 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6601 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6605 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6607 /* Sort the state on charge group position.
6608 * This enables exact restarts from this step.
6609 * It also improves performance by about 15% with larger numbers
6610 * of atoms per node.
6613 /* Fill the ns grid with the home cell,
6614 * so we can sort with the indices.
6616 set_zones_ncg_home(dd);
6618 switch (fr->cutoff_scheme)
6621 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6623 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6625 comm->zones.size[0].bb_x0,
6626 comm->zones.size[0].bb_x1,
6628 comm->zones.dens_zone0,
6630 as_rvec_array(state_local->x.data()),
6631 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6632 fr->nbv->grp[eintLocal].kernel_type,
6635 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6638 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6639 0, dd->ncg_home, fr->cg_cm);
6641 copy_ivec(fr->ns->grid->n, ncells_new);
6644 gmx_incons("unimplemented");
6647 bResortAll = bMasterState;
6649 /* Check if we can user the old order and ns grid cell indices
6650 * of the charge groups to sort the charge groups efficiently.
6652 if (ncells_new[XX] != ncells_old[XX] ||
6653 ncells_new[YY] != ncells_old[YY] ||
6654 ncells_new[ZZ] != ncells_old[ZZ])
6661 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6662 gmx_step_str(step, sbuf), dd->ncg_home);
6664 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6665 bResortAll ? -1 : ncg_home_old);
6667 /* After sorting and compacting we set the correct size */
6668 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6670 /* Rebuild all the indices */
6671 ga2la_clear(dd->ga2la);
6674 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6677 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6679 /* Setup up the communication and communicate the coordinates */
6680 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6682 /* Set the indices */
6683 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6685 /* Set the charge group boundaries for neighbor searching */
6686 set_cg_boundaries(&comm->zones);
6688 if (fr->cutoff_scheme == ecutsVERLET)
6690 /* When bSortCG=true, we have already set the size for zone 0 */
6691 set_zones_size(dd, state_local->box, &ddbox,
6692 bSortCG ? 1 : 0, comm->zones.n,
6696 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6699 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6700 -1,as_rvec_array(state_local->x.data()),state_local->box);
6703 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6705 /* Extract a local topology from the global topology */
6706 for (int i = 0; i < dd->ndim; i++)
6708 np[dd->dim[i]] = comm->cd[i].numPulses();
6710 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6711 comm->cellsize_min, np,
6713 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6714 vsite, top_global, top_local);
6716 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6718 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6720 /* Set up the special atom communication */
6721 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6722 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6724 auto range = static_cast<DDAtomRanges::Type>(i);
6727 case DDAtomRanges::Type::Vsites:
6728 if (vsite && vsite->n_intercg_vsite)
6730 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6733 case DDAtomRanges::Type::Constraints:
6734 if (dd->bInterCGcons || dd->bInterCGsettles)
6736 /* Only for inter-cg constraints we need special code */
6737 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6738 constr, ir->nProjOrder,
6739 top_local->idef.il);
6743 gmx_incons("Unknown special atom type setup");
6745 comm->atomRanges.setEnd(range, n);
6748 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6750 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6752 /* Make space for the extra coordinates for virtual site
6753 * or constraint communication.
6755 state_local->natoms = comm->atomRanges.numAtomsTotal();
6757 dd_resize_state(state_local, f, state_local->natoms);
6759 if (fr->haveDirectVirialContributions)
6761 if (vsite && vsite->n_intercg_vsite)
6763 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6767 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6769 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6773 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6782 /* Set the number of atoms required for the force calculation.
6783 * Forces need to be constrained when doing energy
6784 * minimization. For simple simulations we could avoid some
6785 * allocation, zeroing and copying, but this is probably not worth
6786 * the complications and checking.
6788 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6789 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6790 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6793 /* Update atom data for mdatoms and several algorithms */
6794 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6795 nullptr, mdAtoms, constr, vsite, nullptr);
6797 auto mdatoms = mdAtoms->mdatoms();
6798 if (!thisRankHasDuty(cr, DUTY_PME))
6800 /* Send the charges and/or c6/sigmas to our PME only node */
6801 gmx_pme_send_parameters(cr,
6803 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
6804 mdatoms->chargeA, mdatoms->chargeB,
6805 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6806 mdatoms->sigmaA, mdatoms->sigmaB,
6807 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6812 /* Update the local pull groups */
6813 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
6818 /* Update the local rotation groups */
6819 dd_make_local_rotation_groups(dd, ir->rot);
6822 if (ir->eSwapCoords != eswapNO)
6824 /* Update the local groups needed for ion swapping */
6825 dd_make_local_swap_groups(dd, ir->swap);
6828 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6829 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6831 add_dd_statistics(dd);
6833 /* Make sure we only count the cycles for this DD partitioning */
6834 clear_dd_cycle_counts(dd);
6836 /* Because the order of the atoms might have changed since
6837 * the last vsite construction, we need to communicate the constructing
6838 * atom coordinates again (for spreading the forces this MD step).
6840 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6842 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6844 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6846 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6847 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6848 -1, as_rvec_array(state_local->x.data()), state_local->box);
6851 /* Store the partitioning step */
6852 comm->partition_step = step;
6854 /* Increase the DD partitioning counter */
6856 /* The state currently matches this DD partitioning count, store it */
6857 state_local->ddp_count = dd->ddp_count;
6860 /* The DD master node knows the complete cg distribution,
6861 * store the count so we can possibly skip the cg info communication.
6863 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6866 if (comm->DD_debug > 0)
6868 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6869 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6870 "after partitioning");
6873 wallcycle_stop(wcycle, ewcDOMDEC);
6876 /*! \brief Check whether bonded interactions are missing, if appropriate */
6877 void checkNumberOfBondedInteractions(FILE *fplog,
6879 int totalNumberOfBondedInteractions,
6880 const gmx_mtop_t *top_global,
6881 const gmx_localtop_t *top_local,
6882 const t_state *state,
6883 bool *shouldCheckNumberOfBondedInteractions)
6885 if (*shouldCheckNumberOfBondedInteractions)
6887 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6889 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6891 *shouldCheckNumberOfBondedInteractions = false;